diff options
Diffstat (limited to 'drivers/gpu')
699 files changed, 23860 insertions, 6433 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 5504721007cc..6f1cf235073d 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -102,10 +102,15 @@ config DRM_KMS_HELPER help CRTC helpers for KMS drivers. +config DRM_DRAW + bool + depends on DRM + config DRM_PANIC bool "Display a user-friendly message when a kernel panic occurs" depends on DRM select FONT_SUPPORT + select DRM_DRAW help Enable a drm panic handler, which will display a user-friendly message when a kernel panic occurs. It's useful when using a user-space @@ -217,77 +222,7 @@ config DRM_CLIENT option. Drivers that support the default clients should select DRM_CLIENT_SELECTION instead. -config DRM_CLIENT_LIB - tristate - depends on DRM - select DRM_KMS_HELPER if DRM_FBDEV_EMULATION - select FB_CORE if DRM_FBDEV_EMULATION - help - This option enables the DRM client library and selects all - modules and components according to the enabled clients. - -config DRM_CLIENT_SELECTION - tristate - depends on DRM - select DRM_CLIENT_LIB if DRM_FBDEV_EMULATION - help - Drivers that support in-kernel DRM clients have to select this - option. - -config DRM_CLIENT_SETUP - bool - depends on DRM_CLIENT_SELECTION - help - Enables the DRM client selection. DRM drivers that support the - default clients should select DRM_CLIENT_SELECTION instead. - -menu "Supported DRM clients" - depends on DRM_CLIENT_SELECTION - -config DRM_FBDEV_EMULATION - bool "Enable legacy fbdev support for your modesetting driver" - depends on DRM_CLIENT_SELECTION - select DRM_CLIENT - select DRM_CLIENT_SETUP - select FRAMEBUFFER_CONSOLE_DETECT_PRIMARY if FRAMEBUFFER_CONSOLE - default FB - help - Choose this option if you have a need for the legacy fbdev - support. Note that this support also provides the linux console - support on top of your modesetting driver. - - If in doubt, say "Y". - -config DRM_FBDEV_OVERALLOC - int "Overallocation of the fbdev buffer" - depends on DRM_FBDEV_EMULATION - default 100 - help - Defines the fbdev buffer overallocation in percent. Default - is 100. Typical values for double buffering will be 200, - triple buffering 300. - -config DRM_FBDEV_LEAK_PHYS_SMEM - bool "Shamelessly allow leaking of fbdev physical address (DANGEROUS)" - depends on DRM_FBDEV_EMULATION && EXPERT - default n - help - In order to keep user-space compatibility, we want in certain - use-cases to keep leaking the fbdev physical address to the - user-space program handling the fbdev buffer. - This affects, not only, Amlogic, Allwinner or Rockchip devices - with ARM Mali GPUs using an userspace Blob. - This option is not supported by upstream developers and should be - removed as soon as possible and be considered as a broken and - legacy behaviour from a modern fbdev device driver. - - Please send any bug reports when using this to your proprietary - software vendor that requires this. - - If in doubt, say "N" or spread the word to your closed source - library vendor. - -endmenu +source "drivers/gpu/drm/clients/Kconfig" config DRM_LOAD_EDID_FIRMWARE bool "Allow to specify an EDID data set instead of probing for it" @@ -526,6 +461,10 @@ config DRM_HYPERV config DRM_EXPORT_FOR_TESTS bool +# Separate option as not all DRM drivers use it +config DRM_PANEL_BACKLIGHT_QUIRKS + tristate + config DRM_LIB_RANDOM bool default n diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 463afad1b5ca..19fb370fbc56 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -91,10 +91,12 @@ drm-$(CONFIG_DRM_PRIVACY_SCREEN) += \ drm_privacy_screen_x86.o drm-$(CONFIG_DRM_ACCEL) += ../../accel/drm_accel.o drm-$(CONFIG_DRM_PANIC) += drm_panic.o +drm-$(CONFIG_DRM_DRAW) += drm_draw.o drm-$(CONFIG_DRM_PANIC_SCREEN_QR_CODE) += drm_panic_qr.o obj-$(CONFIG_DRM) += drm.o obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o +obj-$(CONFIG_DRM_PANEL_BACKLIGHT_QUIRKS) += drm_panel_backlight_quirks.o # # Memory-management helpers @@ -149,14 +151,6 @@ drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o # -# DRM clients -# - -drm_client_lib-y := drm_client_setup.o -drm_client_lib-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_client.o -obj-$(CONFIG_DRM_CLIENT_LIB) += drm_client_lib.o - -# # Drivers and the rest # @@ -165,6 +159,7 @@ obj-y += tests/ obj-$(CONFIG_DRM_MIPI_DBI) += drm_mipi_dbi.o obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o obj-y += arm/ +obj-y += clients/ obj-y += display/ obj-$(CONFIG_DRM_TTM) += ttm/ obj-$(CONFIG_DRM_SCHED) += scheduler/ diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 41fa3377d9cf..1a11cab741ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -26,6 +26,7 @@ config DRM_AMDGPU select DRM_BUDDY select DRM_SUBALLOC_HELPER select DRM_EXEC + select DRM_PANEL_BACKLIGHT_QUIRKS # amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work # ACPI_VIDEO's dependencies must also be selected. select INPUT if ACPI diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index c7b18c52825d..5b21674b07fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -1,5 +1,5 @@ # -# Copyright 2017 Advanced Micro Devices, Inc. +# Copyright 2017-2024 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -105,7 +105,7 @@ amdgpu-y += \ # add UMC block amdgpu-y += \ - umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o + umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o umc_v8_14.o # add IH block amdgpu-y += \ @@ -200,6 +200,7 @@ amdgpu-y += \ vcn_v4_0_3.o \ vcn_v4_0_5.o \ vcn_v5_0_0.o \ + vcn_v5_0_1.o \ amdgpu_jpeg.o \ jpeg_v1_0.o \ jpeg_v2_0.o \ @@ -208,7 +209,8 @@ amdgpu-y += \ jpeg_v4_0.o \ jpeg_v4_0_3.o \ jpeg_v4_0_5.o \ - jpeg_v5_0_0.o + jpeg_v5_0_0.o \ + jpeg_v5_0_1.o # add VPE block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index f44de9d4b6a1..e13fbd974141 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -334,6 +334,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, AMDGPU_INIT_LEVEL_RESET_RECOVERY); dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); + /*TBD: Ideally should clear only GFX, SDMA blocks*/ + amdgpu_ras_clear_err_state(tmp_adev); r = aldebaran_mode2_restore_ip(tmp_adev); if (r) goto end; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4653a8d2823a..69895fccb474 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -880,6 +880,7 @@ struct amdgpu_device { bool need_swiotlb; bool accel_working; struct notifier_block acpi_nb; + struct notifier_block pm_nb; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; struct debugfs_blob_wrapper debugfs_vbios_blob; struct debugfs_blob_wrapper debugfs_discovery_blob; @@ -1174,7 +1175,6 @@ struct amdgpu_device { struct work_struct reset_work; - bool job_hang; bool dc_enabled; /* Mask of active clusters */ uint32_t aid_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 5ef6b745f222..f3289d289913 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -71,6 +71,11 @@ struct ras_query_context; #define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE) #define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED) +#define mmSMNAID_AID0_MCA_SMU 0x03b30400 /* SMN AID AID0 */ +#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */ +#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ +#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ + enum aca_reg_idx { ACA_REG_IDX_CTL = 0, ACA_REG_IDX_STATUS = 1, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index ec5e0dcf8613..deb0785350e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -140,7 +140,7 @@ static int acp_poweroff(struct generic_pm_domain *genpd) * 2. power off the acp tiles * 3. check and enter ulv state */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0); return 0; } @@ -157,7 +157,7 @@ static int acp_poweron(struct generic_pm_domain *genpd) * 2. turn on acp clock * 3. power on acp tiles */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0); return 0; } @@ -236,7 +236,7 @@ static int acp_hw_init(struct amdgpu_ip_block *ip_block) ip_block->version->major, ip_block->version->minor); /* -ENODEV means board uses AZ rather than ACP */ if (r == -ENODEV) { - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0); return 0; } else if (r) { return r; @@ -508,7 +508,7 @@ static int acp_hw_fini(struct amdgpu_ip_block *ip_block) /* return early if no ACP */ if (!adev->acp.acp_genpd) { - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0); return 0; } @@ -565,7 +565,7 @@ static int acp_suspend(struct amdgpu_ip_block *ip_block) /* power up on suspend */ if (!adev->acp.acp_cell) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0); return 0; } @@ -575,7 +575,7 @@ static int acp_resume(struct amdgpu_ip_block *ip_block) /* power down again on resume */ if (!adev->acp.acp_cell) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0); return 0; } @@ -584,19 +584,19 @@ static bool acp_is_idle(void *handle) return true; } -static int acp_set_clockgating_state(void *handle, +static int acp_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int acp_set_powergating_state(void *handle, +static int acp_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 3afcd1e8aa54..2e5732dfd425 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -368,7 +368,7 @@ void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj) { struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj; - amdgpu_bo_reserve(*bo, true); + (void)amdgpu_bo_reserve(*bo, true); amdgpu_bo_kunmap(*bo); amdgpu_bo_unpin(*bo); amdgpu_bo_unreserve(*bo); @@ -724,7 +724,9 @@ void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) /* Disable GFXOFF and PG. Temporary workaround * to fix some compute applications issue on GFX9. */ - adev->ip_blocks[AMD_IP_BLOCK_TYPE_GFX].version->funcs->set_powergating_state((void *)adev, state); + struct amdgpu_ip_block *gfx_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + if (gfx_block != NULL) + gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state); } amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, @@ -834,7 +836,7 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off, if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - if (!kiq_ring->sched.ready || adev->job_hang) + if (!kiq_ring->sched.ready || amdgpu_in_reset(adev)) return 0; ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4b80ad860639..8af67f18500a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -433,6 +433,9 @@ void kgd2kfd_unlock_kfd(void); int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id); +bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, + bool retry_fault); + #else static inline int kgd2kfd_init(void) { @@ -518,5 +521,12 @@ static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) { return false; } + +static inline bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, + bool retry_fault) +{ + return false; +} + #endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f30548f4c3b3..1e998f972c30 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -730,7 +730,7 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem, return; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); - ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); sg_free_table(ttm->sg); @@ -779,7 +779,7 @@ kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem, } amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); - ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; @@ -989,7 +989,7 @@ unwind: if (!attachment[i]) continue; if (attachment[i]->bo_va) { - amdgpu_bo_reserve(bo[i], true); + (void)amdgpu_bo_reserve(bo[i], true); if (--attachment[i]->bo_va->ref_count == 0) amdgpu_vm_bo_del(adev, attachment[i]->bo_va); amdgpu_bo_unreserve(bo[i]); @@ -1259,11 +1259,11 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem, return -EBUSY; } - amdgpu_vm_bo_unmap(adev, bo_va, entry->va); + (void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va); - amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); + (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); - amdgpu_sync_fence(sync, bo_va->last_pt_update); + (void)amdgpu_sync_fence(sync, bo_va->last_pt_update); return 0; } @@ -2352,7 +2352,7 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem) { struct amdgpu_bo *bo = mem->bo; - amdgpu_bo_reserve(bo, true); + (void)amdgpu_bo_reserve(bo, true); amdgpu_bo_kunmap(bo); amdgpu_bo_unpin(bo); amdgpu_bo_unreserve(bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 45affc02548c..423fd2eebe1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -47,35 +47,37 @@ /* Check if current bios is an ATOM BIOS. * Return true if it is ATOM BIOS. Otherwise, return false. */ -static bool check_atom_bios(uint8_t *bios, size_t size) +static bool check_atom_bios(struct amdgpu_device *adev, size_t size) { uint16_t tmp, bios_header_start; + uint8_t *bios = adev->bios; if (!bios || size < 0x49) { - DRM_INFO("vbios mem is null or mem size is wrong\n"); + dev_dbg(adev->dev, "VBIOS mem is null or mem size is wrong\n"); return false; } if (!AMD_IS_VALID_VBIOS(bios)) { - DRM_INFO("BIOS signature incorrect %x %x\n", bios[0], bios[1]); + dev_dbg(adev->dev, "VBIOS signature incorrect %x %x\n", bios[0], + bios[1]); return false; } bios_header_start = bios[0x48] | (bios[0x49] << 8); if (!bios_header_start) { - DRM_INFO("Can't locate bios header\n"); + dev_dbg(adev->dev, "Can't locate VBIOS header\n"); return false; } tmp = bios_header_start + 4; if (size < tmp) { - DRM_INFO("BIOS header is broken\n"); + dev_dbg(adev->dev, "VBIOS header is broken\n"); return false; } if (!memcmp(bios + tmp, "ATOM", 4) || !memcmp(bios + tmp, "MOTA", 4)) { - DRM_DEBUG("ATOMBIOS detected\n"); + dev_dbg(adev->dev, "ATOMBIOS detected\n"); return true; } @@ -118,7 +120,7 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev) memcpy_fromio(adev->bios, bios, size); iounmap(bios); - if (!check_atom_bios(adev->bios, size)) { + if (!check_atom_bios(adev, size)) { kfree(adev->bios); return false; } @@ -146,7 +148,7 @@ bool amdgpu_read_bios(struct amdgpu_device *adev) memcpy_fromio(adev->bios, bios, size); pci_unmap_rom(adev->pdev, bios); - if (!check_atom_bios(adev->bios, size)) { + if (!check_atom_bios(adev, size)) { kfree(adev->bios); return false; } @@ -186,7 +188,7 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev) /* read complete BIOS */ amdgpu_asic_read_bios_from_rom(adev, adev->bios, len); - if (!check_atom_bios(adev->bios, len)) { + if (!check_atom_bios(adev, len)) { kfree(adev->bios); return false; } @@ -216,7 +218,7 @@ static bool amdgpu_read_platform_bios(struct amdgpu_device *adev) memcpy_fromio(adev->bios, bios, romlen); iounmap(bios); - if (!check_atom_bios(adev->bios, romlen)) + if (!check_atom_bios(adev, romlen)) goto free_bios; adev->bios_size = romlen; @@ -324,7 +326,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) break; } - if (!check_atom_bios(adev->bios, size)) { + if (!check_atom_bios(adev, size)) { kfree(adev->bios); return false; } @@ -389,7 +391,7 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev) vhdr->ImageLength, GFP_KERNEL); - if (!check_atom_bios(adev->bios, vhdr->ImageLength)) { + if (!check_atom_bios(adev, vhdr->ImageLength)) { kfree(adev->bios); return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 16153d275d7a..68bce6a6d09d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -414,7 +414,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->pm.fw, "%s", fw_name); + err = amdgpu_ucode_request(adev, &adev->pm.fw, + AMDGPU_UCODE_REQUIRED, + "%s", fw_name); if (err) { DRM_ERROR("Failed to load firmware \"%s\"", fw_name); amdgpu_ucode_release(&adev->pm.fw); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d891ab779ca7..5df21529b3b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1801,13 +1801,18 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) return -EINVAL; + /* Make sure VRAM is allocated contigiously */ (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); - for (i = 0; i < (*bo)->placement.num_placement; i++) - (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; - r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); - if (r) - return r; + if ((*bo)->tbo.resource->mem_type == TTM_PL_VRAM && + !((*bo)->tbo.resource->placement & TTM_PL_FLAG_CONTIGUOUS)) { + + amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); + for (i = 0; i < (*bo)->placement.num_placement; i++) + (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); + if (r) + return r; + } return amdgpu_ttm_alloc_gart(&(*bo)->tbo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a68338cb7b4a..49ca8c814455 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -2095,6 +2095,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) if (amdgpu_umsch_mm & amdgpu_umsch_mm_fwlog) amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm); + amdgpu_debugfs_vcn_sched_mask_init(adev); amdgpu_debugfs_jpeg_sched_mask_init(adev); amdgpu_debugfs_gfx_sched_mask_init(adev); amdgpu_debugfs_compute_sched_mask_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index 946c48829f19..824f9da5b6ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -343,11 +343,10 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, coredump->skip_vram_check = skip_vram_check; coredump->reset_vram_lost = vram_lost; - if (job && job->vm) { - struct amdgpu_vm *vm = job->vm; + if (job && job->pasid) { struct amdgpu_task_info *ti; - ti = amdgpu_vm_get_task_info_vm(vm); + ti = amdgpu_vm_get_task_info_pasid(adev, job->pasid); if (ti) { coredump->reset_task_info = *ti; amdgpu_vm_put_task_info(ti); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 96316111300a..36053b3d48b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -145,7 +145,7 @@ const char *amdgpu_asic_name[] = { "LAST", }; -#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM, 0) +#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0) /* * Default init level where all blocks are expected to be initialized. This is * the level of initialization expected by default and also after a full reset @@ -199,14 +199,16 @@ void amdgpu_set_init_level(struct amdgpu_device *adev, } static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev); +static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, + void *data); /** * DOC: pcie_replay_count * * The amdgpu driver provides a sysfs API for reporting the total number - * of PCIe replays (NAKs) + * of PCIe replays (NAKs). * The file pcie_replay_count is used for this and returns the total - * number of replays as a sum of the NAKs generated and NAKs received + * number of replays as a sum of the NAKs generated and NAKs received. */ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, @@ -417,6 +419,9 @@ bool amdgpu_device_supports_boco(struct drm_device *dev) { struct amdgpu_device *adev = drm_to_adev(dev); + if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) + return false; + if (adev->has_pr3 || ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid())) return true; @@ -429,8 +434,8 @@ bool amdgpu_device_supports_boco(struct drm_device *dev) * @dev: drm_device pointer * * Return: - * 1 if the device supporte BACO; - * 3 if the device support MACO (only works if BACO is supported) + * 1 if the device supports BACO; + * 3 if the device supports MACO (only works if BACO is supported) * otherwise return 0. */ int amdgpu_device_supports_baco(struct drm_device *dev) @@ -577,7 +582,7 @@ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, } /** - * amdgpu_device_aper_access - access vram by vram aperature + * amdgpu_device_aper_access - access vram by vram aperture * * @adev: amdgpu_device pointer * @pos: offset of the buffer in vram @@ -668,7 +673,7 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) * here is that the GPU reset is not running on another thread in parallel. * * For this we trylock the read side of the reset semaphore, if that succeeds - * we know that the reset is not running in paralell. + * we know that the reset is not running in parallel. * * If the trylock fails we assert that we are either already holding the read * side of the lock or are the reset thread itself and hold the write side of @@ -1399,6 +1404,7 @@ static int amdgpu_device_asic_init(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) { amdgpu_psp_wait_for_bootloader(adev); ret = amdgpu_atomfirmware_asic_init(adev, true); @@ -1733,7 +1739,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) uint32_t fw_ver; err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); - /* force vPost if error occured */ + /* force vPost if error occurred */ if (err) return true; @@ -2165,7 +2171,7 @@ int amdgpu_device_ip_set_clockgating_state(void *dev, if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) continue; r = adev->ip_blocks[i].version->funcs->set_clockgating_state( - (void *)adev, state); + &adev->ip_blocks[i], state); if (r) DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2199,7 +2205,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev, if (!adev->ip_blocks[i].version->funcs->set_powergating_state) continue; r = adev->ip_blocks[i].version->funcs->set_powergating_state( - (void *)adev, state); + &adev->ip_blocks[i], state); if (r) DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2378,7 +2384,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, * the module parameter virtual_display. This feature provides a virtual * display hardware on headless boards or in virtualized environments. * This function parses and validates the configuration string specified by - * the user and configues the virtual display configuration (number of + * the user and configures the virtual display configuration (number of * virtual connectors, crtcs, etc.) specified. */ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) @@ -2441,7 +2447,7 @@ void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * * Parses the asic configuration parameters specified in the gpu info - * firmware and makes them availale to the driver for use in configuring + * firmware and makes them available to the driver for use in configuring * the asic. * Returns 0 on success, -EINVAL on failure. */ @@ -2482,6 +2488,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_gpu_info.bin", chip_name); if (err) { dev_err(adev->dev, @@ -2501,7 +2508,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) le32_to_cpu(hdr->header.ucode_array_offset_bytes)); /* - * Should be droped when DAL no longer needs it. + * Should be dropped when DAL no longer needs it. */ if (adev->asic_type == CHIP_NAVI12) goto parse_soc_bounding_box; @@ -3061,7 +3068,7 @@ init_failed: * * Writes a reset magic value to the gart pointer in VRAM. The driver calls * this function before a GPU reset. If the value is retained after a - * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. + * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents. */ static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) { @@ -3137,7 +3144,7 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev, adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* enable clockgating to save power */ - r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, + r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i], state); if (r) { DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", @@ -3174,7 +3181,7 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev, adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && adev->ip_blocks[i].version->funcs->set_powergating_state) { /* enable powergating to save power */ - r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, + r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i], state); if (r) { DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", @@ -3376,7 +3383,7 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) amdgpu_amdkfd_suspend(adev, false); - /* Workaroud for ASICs need to disable SMC first */ + /* Workaround for ASICs need to disable SMC first */ amdgpu_device_smu_fini_early(adev); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { @@ -3478,7 +3485,7 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) WARN_ON_ONCE(adev->gfx.gfx_off_state); WARN_ON_ONCE(adev->gfx.gfx_off_req_count); - if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) + if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0)) adev->gfx.gfx_off_state = true; } @@ -4306,7 +4313,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* * Reset domain needs to be present early, before XGMI hive discovered - * (if any) and intitialized to use reset sem and in_gpu reset flag + * (if any) and initialized to use reset sem and in_gpu reset flag * early on during init and before calling to RREG32. */ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); @@ -4596,6 +4603,11 @@ fence_driver_init: amdgpu_device_check_iommu_direct_map(adev); + adev->pm_nb.notifier_call = amdgpu_device_pm_notifier; + r = register_pm_notifier(&adev->pm_nb); + if (r) + goto failed; + return 0; release_ras_con: @@ -4660,6 +4672,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) drain_workqueue(adev->mman.bdev.wq); adev->shutdown = true; + unregister_pm_notifier(&adev->pm_nb); + /* make sure IB test finished before entering exclusive mode * to avoid preemption on IB test */ @@ -4778,8 +4792,8 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) { int ret; - /* No need to evict vram on APUs for suspend to ram or s2idle */ - if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) + /* No need to evict vram on APUs unless going to S4 */ + if (!adev->in_s4 && (adev->flags & AMD_IS_APU)) return 0; ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); @@ -4792,6 +4806,41 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) * Suspend & resume. */ /** + * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events + * @nb: notifier block + * @mode: suspend mode + * @data: data + * + * This function is called when the system is about to suspend or hibernate. + * It is used to evict resources from the device before the system goes to + * sleep while there is still access to swap. + */ +static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, + void *data) +{ + struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb); + int r; + + switch (mode) { + case PM_HIBERNATION_PREPARE: + adev->in_s4 = true; + fallthrough; + case PM_SUSPEND_PREPARE: + r = amdgpu_device_evict_resources(adev); + /* + * This is considered non-fatal at this time because + * amdgpu_device_prepare() will also fatally evict resources. + * See https://gitlab.freedesktop.org/drm/amd/-/issues/3781 + */ + if (r) + drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r); + break; + } + + return NOTIFY_DONE; +} + +/** * amdgpu_device_prepare - prepare for device suspend * * @dev: drm dev pointer @@ -4830,7 +4879,7 @@ int amdgpu_device_prepare(struct drm_device *dev) return 0; unprepare: - adev->in_s0ix = adev->in_s3 = false; + adev->in_s0ix = adev->in_s3 = adev->in_s4 = false; return r; } @@ -5181,7 +5230,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; - amdgpu_ras_set_fed(adev, false); + amdgpu_ras_clear_err_state(adev); amdgpu_irq_gpu_reset_resume_helper(adev); /* some sw clean up VF needs to do before recover */ @@ -5238,16 +5287,18 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, } /** - * amdgpu_device_has_job_running - check if there is any job in mirror list + * amdgpu_device_has_job_running - check if there is any unfinished job * * @adev: amdgpu_device pointer * - * check if there is any job in mirror list + * check if there is any job running on the device when guest driver receives + * FLR notification from host driver. If there are still jobs running, then + * the guest driver will not respond the FLR reset. Instead, let the job hit + * the timeout and guest driver then issue the reset request. */ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) { int i; - struct drm_sched_job *job; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -5255,11 +5306,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) if (!amdgpu_ring_sched_ready(ring)) continue; - spin_lock(&ring->sched.job_list_lock); - job = list_first_entry_or_null(&ring->sched.pending_list, - struct drm_sched_job, list); - spin_unlock(&ring->sched.job_list_lock); - if (job) + if (amdgpu_fence_count_emitted(ring)) return true; } return false; @@ -5484,7 +5531,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) amdgpu_set_init_level(tmp_adev, init_level); if (full_reset) { /* post card */ - amdgpu_ras_set_fed(tmp_adev, false); + amdgpu_ras_clear_err_state(tmp_adev); r = amdgpu_device_asic_init(tmp_adev); if (r) { dev_warn(tmp_adev->dev, "asic atom init failed!"); @@ -5818,6 +5865,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, int retry_limit = AMDGPU_MAX_RETRY_LIMIT; /* + * If it reaches here because of hang/timeout and a RAS error is + * detected at the same time, let RAS recovery take care of it. + */ + if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) && + !amdgpu_sriov_vf(adev) && + reset_context->src != AMDGPU_RESET_SRC_RAS) { + dev_dbg(adev->dev, + "Gpu recovery from source: %d yielding to RAS error recovery handling", + reset_context->src); + return 0; + } + /* * Special case: RAS triggered and full reset isn't supported */ need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); @@ -5900,7 +5959,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_amdkfd_pre_reset(tmp_adev, reset_context); /* - * Mark these ASICs to be reseted as untracked first + * Mark these ASICs to be reset as untracked first * And add them back after reset completed */ amdgpu_unregister_gpu_instance(tmp_adev); @@ -6103,7 +6162,7 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * - * Fetchs and stores in the driver the PCIE capabilities (gen speed + * Fetches and stores in the driver the PCIE capabilities (gen speed * and lanes) of the slot the device is in. Handles APUs and * virtualized environments where PCIE config space may not be available. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 1040204ac8b9..949d74eff294 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1,5 +1,5 @@ /* - * Copyright 2018 Advanced Micro Devices, Inc. + * Copyright 2018-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -104,7 +104,9 @@ #include "smuio_v13_0_6.h" #include "smuio_v14_0_2.h" #include "vcn_v5_0_0.h" +#include "vcn_v5_0_1.h" #include "jpeg_v5_0_0.h" +#include "jpeg_v5_0_1.h" #include "amdgpu_vpe.h" #if defined(CONFIG_DRM_AMD_ISP) @@ -1340,7 +1342,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) */ if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES) { - adev->vcn.vcn_config[adev->vcn.num_vcn_inst] = + adev->vcn.inst[adev->vcn.num_vcn_inst].vcn_config = ip->revision & 0xc0; adev->vcn.num_vcn_inst++; adev->vcn.inst_mask |= @@ -1705,7 +1707,7 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev) * so this won't overflow. */ for (v = 0; v < adev->vcn.num_vcn_inst; v++) { - adev->vcn.vcn_codec_disable_mask[v] = + adev->vcn.inst[v].vcn_codec_disable_mask = le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits); } break; @@ -1836,6 +1838,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); break; case IP_VERSION(10, 1, 10): @@ -1890,6 +1893,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); break; case IP_VERSION(10, 1, 10): @@ -2013,6 +2017,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): case IP_VERSION(13, 0, 11): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): case IP_VERSION(14, 0, 0): case IP_VERSION(14, 0, 1): @@ -2184,6 +2189,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block); break; case IP_VERSION(10, 1, 10): @@ -2238,6 +2244,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(4, 4, 2): case IP_VERSION(4, 4, 5): + case IP_VERSION(4, 4, 4): amdgpu_device_ip_block_add(adev, &sdma_v4_4_2_ip_block); break; case IP_VERSION(5, 0, 0): @@ -2361,6 +2368,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block); break; + case IP_VERSION(5, 0, 1): + amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block); + break; default: dev_err(adev->dev, "Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n", @@ -2405,6 +2416,7 @@ static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): aqua_vanjaram_init_soc_config(adev); break; default: @@ -2652,6 +2664,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): adev->family = AMDGPU_FAMILY_AI; break; case IP_VERSION(9, 1, 0): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index b119d27271c1..35c778426a7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -33,6 +33,7 @@ #include "soc15_common.h" #include "gc/gc_11_0_0_offset.h" #include "gc/gc_11_0_0_sh_mask.h" +#include "bif/bif_4_1_d.h" #include <asm/div64.h> #include <linux/pci.h> @@ -1788,3 +1789,82 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev) return 0; } +/* panic_bo is set in amdgpu_dm_plane_get_scanout_buffer() and only used in amdgpu_dm_set_pixel() + * they are called from the panic handler, and protected by the drm_panic spinlock. + */ +static struct amdgpu_bo *panic_abo; + +/* Use the indirect MMIO to write each pixel to the GPU VRAM, + * This is a simplified version of amdgpu_device_mm_access() + */ +static void amdgpu_display_set_pixel(struct drm_scanout_buffer *sb, + unsigned int x, + unsigned int y, + u32 color) +{ + struct amdgpu_res_cursor cursor; + unsigned long offset; + struct amdgpu_bo *abo = panic_abo; + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); + uint32_t tmp; + + offset = x * 4 + y * sb->pitch[0]; + amdgpu_res_first(abo->tbo.resource, offset, 4, &cursor); + + tmp = cursor.start >> 31; + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t) cursor.start) | 0x80000000); + if (tmp != 0xffffffff) + WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); + WREG32_NO_KIQ(mmMM_DATA, color); +} + +int amdgpu_display_get_scanout_buffer(struct drm_plane *plane, + struct drm_scanout_buffer *sb) +{ + struct amdgpu_bo *abo; + struct drm_framebuffer *fb = plane->state->fb; + + if (!fb) + return -EINVAL; + + DRM_DEBUG_KMS("Framebuffer %dx%d %p4cc\n", fb->width, fb->height, &fb->format->format); + + abo = gem_to_amdgpu_bo(fb->obj[0]); + if (!abo) + return -EINVAL; + + sb->width = fb->width; + sb->height = fb->height; + /* Use the generic linear format, because tiling will be disabled in panic_flush() */ + sb->format = drm_format_info(fb->format->format); + if (!sb->format) + return -EINVAL; + + sb->pitch[0] = fb->pitches[0]; + + if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) { + if (abo->tbo.resource->mem_type != TTM_PL_VRAM) { + drm_warn(plane->dev, "amdgpu panic, framebuffer not in VRAM\n"); + return -EINVAL; + } + /* Only handle 32bits format, to simplify mmio access */ + if (fb->format->cpp[0] != 4) { + drm_warn(plane->dev, "amdgpu panic, pixel format is not 32bits\n"); + return -EINVAL; + } + sb->set_pixel = amdgpu_display_set_pixel; + panic_abo = abo; + return 0; + } + if (!abo->kmap.virtual && + ttm_bo_kmap(&abo->tbo, 0, PFN_UP(abo->tbo.base.size), &abo->kmap)) { + drm_warn(plane->dev, "amdgpu bo map failed, panic won't be displayed\n"); + return -ENOMEM; + } + if (abo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK) + iosys_map_set_vaddr_iomem(&sb->map[0], abo->kmap.virtual); + else + iosys_map_set_vaddr(&sb->map[0], abo->kmap.virtual); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 9d19940f73c8..dfa0d642ac16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -23,6 +23,8 @@ #ifndef __AMDGPU_DISPLAY_H__ #define __AMDGPU_DISPLAY_H__ +#include <drm/drm_panic.h> + #define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc)) #define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l)) #define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e)) @@ -49,4 +51,7 @@ amdgpu_lookup_format_info(u32 format, uint64_t modifier); int amdgpu_display_suspend_helper(struct amdgpu_device *adev); int amdgpu_display_resume_helper(struct amdgpu_device *adev); +int amdgpu_display_get_scanout_buffer(struct drm_plane *plane, + struct drm_scanout_buffer *sb); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 38686203bea6..492b09d84571 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -23,7 +23,7 @@ */ #include <drm/amdgpu_drm.h> -#include <drm/drm_client_setup.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_ttm.h> #include <drm/drm_gem.h> @@ -2552,7 +2552,6 @@ static int amdgpu_pmops_freeze(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int r; - adev->in_s4 = true; r = amdgpu_device_suspend(drm_dev, true); adev->in_s4 = false; if (r) @@ -2916,7 +2915,6 @@ static const struct drm_driver amdgpu_kms_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = KMS_DRIVER_MAJOR, .minor = KMS_DRIVER_MINOR, .patchlevel = KMS_DRIVER_PATCHLEVEL, @@ -2940,7 +2938,6 @@ const struct drm_driver amdgpu_partition_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = KMS_DRIVER_MAJOR, .minor = KMS_DRIVER_MINOR, .patchlevel = KMS_DRIVER_PATCHLEVEL, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h index 5bc2cb661af7..2d86cc6f7f4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h @@ -40,7 +40,6 @@ #define DRIVER_NAME "amdgpu" #define DRIVER_DESC "AMD GPU" -#define DRIVER_DATE "20150101" extern const struct drm_driver amdgpu_partition_driver; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index ceb5163480f4..09c9194d5bd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -384,7 +384,7 @@ int amdgpu_fru_sysfs_init(struct amdgpu_device *adev) void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev) { - if (!is_fru_eeprom_supported(adev, NULL) || !adev->fru_info) + if (!adev->fru_info) return; sysfs_remove_files(&adev->dev->kobj, amdgpu_fru_attributes); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h index bc58dca18035..98f3196599ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h @@ -32,7 +32,7 @@ struct amdgpu_fru_info { char product_name[AMDGPU_PRODUCT_NAME_LEN]; char serial[20]; char manufacturer_name[32]; - char fru_id[32]; + char fru_id[50]; }; int amdgpu_fru_get_product_info(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 69a6b6dba0a5..6d5d81f0dc4e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -515,7 +515,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - if (!kiq_ring->sched.ready || adev->job_hang || amdgpu_in_reset(adev)) + if (!kiq_ring->sched.ready || amdgpu_in_reset(adev)) return 0; spin_lock(&kiq->ring_lock); @@ -567,7 +567,7 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - if (!adev->gfx.kiq[0].ring.sched.ready || adev->job_hang) + if (!adev->gfx.kiq[0].ring.sched.ready || amdgpu_in_reset(adev)) return 0; if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { @@ -806,7 +806,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) /* If going to s2idle, no need to wait */ if (adev->in_s0ix) { if (!amdgpu_dpm_set_powergating_by_smu(adev, - AMD_IP_BLOCK_TYPE_GFX, true)) + AMD_IP_BLOCK_TYPE_GFX, true, 0)) adev->gfx.gfx_off_state = true; } else { schedule_delayed_work(&adev->gfx.gfx_off_delay_work, @@ -818,7 +818,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); if (adev->gfx.gfx_off_state && - !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) { + !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false, 0)) { adev->gfx.gfx_off_state = false; if (adev->gfx.funcs->init_spm_golden) { @@ -1484,6 +1484,24 @@ static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id) return 0; } +/** + * amdgpu_gfx_set_run_cleaner_shader - Execute the AMDGPU GFX Cleaner Shader + * @dev: The device structure + * @attr: The device attribute structure + * @buf: The buffer containing the input data + * @count: The size of the input data + * + * Provides the sysfs interface to manually run a cleaner shader, which is + * used to clear the GPU state between different tasks. Writing a value to the + * 'run_cleaner_shader' sysfs file triggers the cleaner shader execution. + * The value written corresponds to the partition index on multi-partition + * devices. On single-partition devices, the value should be '0'. + * + * The cleaner shader clears the Local Data Store (LDS) and General Purpose + * Registers (GPRs) to ensure data isolation between GPU workloads. + * + * Return: The number of bytes written to the sysfs file. + */ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, struct device_attribute *attr, const char *buf, @@ -1532,6 +1550,19 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, return count; } +/** + * amdgpu_gfx_get_enforce_isolation - Query AMDGPU GFX Enforce Isolation Settings + * @dev: The device structure + * @attr: The device attribute structure + * @buf: The buffer to store the output data + * + * Provides the sysfs read interface to get the current settings of the 'enforce_isolation' + * feature for each GPU partition. Reading from the 'enforce_isolation' + * sysfs file returns the isolation settings for all partitions, where '0' + * indicates disabled and '1' indicates enabled. + * + * Return: The number of bytes read from the sysfs file. + */ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, struct device_attribute *attr, char *buf) @@ -1555,6 +1586,20 @@ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, return size; } +/** + * amdgpu_gfx_set_enforce_isolation - Control AMDGPU GFX Enforce Isolation + * @dev: The device structure + * @attr: The device attribute structure + * @buf: The buffer containing the input data + * @count: The size of the input data + * + * This function allows control over the 'enforce_isolation' feature, which + * serializes access to the graphics engine. Writing '1' or '0' to the + * 'enforce_isolation' sysfs file enables or disables process isolation for + * each partition. The input should specify the setting for all partitions. + * + * Return: The number of bytes written to the sysfs file. + */ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -1940,6 +1985,17 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) mutex_unlock(&adev->enforce_isolation_mutex); } +/** + * amdgpu_gfx_enforce_isolation_wait_for_kfd - Manage KFD wait period for process isolation + * @adev: amdgpu_device pointer + * @idx: Index of the GPU partition + * + * When kernel submissions come in, the jobs are given a time slice and once + * that time slice is up, if there are KFD user queues active, kernel + * submissions are blocked until KFD has had its time slice. Once the KFD time + * slice is up, KFD user queues are preempted and kernel submissions are + * unblocked and allowed to run again. + */ static void amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, u32 idx) @@ -1985,6 +2041,15 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, msleep(GFX_SLICE_PERIOD_MS); } +/** + * amdgpu_gfx_enforce_isolation_ring_begin_use - Begin use of a ring with enforced isolation + * @ring: Pointer to the amdgpu_ring structure + * + * Ring begin_use helper implementation for gfx which serializes access to the + * gfx IP between kernel submission IOCTLs and KFD user queues when isolation + * enforcement is enabled. The kernel submission IOCTLs and KFD user queues + * each get a time slice when both are active. + */ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -2012,6 +2077,15 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) mutex_unlock(&adev->enforce_isolation_mutex); } +/** + * amdgpu_gfx_enforce_isolation_ring_end_use - End use of a ring with enforced isolation + * @ring: Pointer to the amdgpu_ring structure + * + * Ring end_use helper implementation for gfx which serializes access to the + * gfx IP between kernel submission IOCTLs and KFD user queues when isolation + * enforcement is enabled. The kernel submission IOCTLs and KFD user queues + * each get a time slice when both are active. + */ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -2050,7 +2124,7 @@ static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val) if (!adev) return -ENODEV; - mask = (1 << adev->gfx.num_gfx_rings) - 1; + mask = (1ULL << adev->gfx.num_gfx_rings) - 1; if ((val & mask) == 0) return -EINVAL; @@ -2078,7 +2152,7 @@ static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val) for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { ring = &adev->gfx.gfx_ring[i]; if (ring->sched.ready) - mask |= 1 << i; + mask |= 1ULL << i; } *val = mask; @@ -2120,7 +2194,7 @@ static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val) if (!adev) return -ENODEV; - mask = (1 << adev->gfx.num_compute_rings) - 1; + mask = (1ULL << adev->gfx.num_compute_rings) - 1; if ((val & mask) == 0) return -EINVAL; @@ -2149,7 +2223,7 @@ static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val) for (i = 0; i < adev->gfx.num_compute_rings; ++i) { ring = &adev->gfx.compute_ring[i]; if (ring->sched.ready) - mask |= 1 << i; + mask |= 1ULL << i; } *val = mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 8b512dc28df8..d751995dc131 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -89,16 +89,14 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, /** * amdgpu_ib_free - free an IB (Indirect Buffer) * - * @adev: amdgpu_device pointer * @ib: IB object to free * @f: the fence SA bo need wait on for the ib alloation * * Free an IB (all asics). */ -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct dma_fence *f) +void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f) { - amdgpu_sa_bo_free(adev, &ib->sa_bo, f); + amdgpu_sa_bo_free(&ib->sa_bo, f); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index f3b0aaf3ebc6..901f8b12c672 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -298,3 +298,9 @@ uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, dw2 = le32_to_cpu(ih->ring[ring_index + 2]); return dw1 | ((u64)(dw2 & 0xffff) << 32); } + +const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) +{ + return ih == &adev->irq.ih ? "ih" : ih == &adev->irq.ih_soft ? "sw ih" : + ih == &adev->irq.ih1 ? "ih1" : ih == &adev->irq.ih2 ? "ih2" : "unknown"; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 508f02eb0cf8..7d4395a5d8ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -110,4 +110,5 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, signed int offset); +const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c index 263ce1811cc8..732744488b03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c @@ -77,7 +77,8 @@ static int isp_load_fw_by_psp(struct amdgpu_device *adev) sizeof(ucode_prefix)); /* read isp fw */ - r = amdgpu_ucode_request(adev, &adev->isp.fw, "amdgpu/%s.bin", ucode_prefix); + r = amdgpu_ucode_request(adev, &adev->isp.fw, AMDGPU_UCODE_OPTIONAL, + "amdgpu/%s.bin", ucode_prefix); if (r) { amdgpu_ucode_release(&adev->isp.fw); return r; @@ -128,13 +129,13 @@ static bool isp_is_idle(void *handle) return true; } -static int isp_set_clockgating_state(void *handle, +static int isp_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int isp_set_powergating_state(void *handle, +static int isp_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index b9d08bc96581..100f04475943 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -102,8 +102,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) return DRM_GPU_SCHED_STAT_ENODEV; } - adev->job_hang = true; - /* * Do the coredump immediately after a job timeout to get a very * close dump/snapshot/representation of GPU's current error status @@ -181,7 +179,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) } exit: - adev->job_hang = false; drm_dev_exit(idx); return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -197,11 +194,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (!*job) return -ENOMEM; - /* - * Initialize the scheduler to at least some ring so that we always - * have a pointer to adev. - */ - (*job)->base.sched = &adev->rings[0]->sched; (*job)->vm = vm; amdgpu_sync_create(&(*job)->explicit_sync); @@ -255,7 +247,6 @@ void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds, void amdgpu_job_free_resources(struct amdgpu_job *job) { - struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); struct dma_fence *f; unsigned i; @@ -268,7 +259,7 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) f = NULL; for (i = 0; i < job->num_ibs; ++i) - amdgpu_ib_free(ring->adev, &job->ibs[i], f); + amdgpu_ib_free(&job->ibs[i], f); } static void amdgpu_job_free_cb(struct drm_sched_job *s_job) @@ -367,6 +358,13 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r); goto error; } + /* + * The VM structure might be released after the VMID is + * assigned, we had multiple problems with people trying to use + * the VM pointer so better set it to NULL. + */ + if (!fence) + job->vm = NULL; } return fence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 3eb4a4653fce..d9cb343a8708 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -27,7 +27,8 @@ #include "amdgpu_ras.h" #define AMDGPU_MAX_JPEG_INSTANCES 4 -#define AMDGPU_MAX_JPEG_RINGS 8 +#define AMDGPU_MAX_JPEG_RINGS 10 +#define AMDGPU_MAX_JPEG_RINGS_4_0_3 8 #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) #define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 59ec20b07a6a..32b27a1658e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -1610,10 +1610,12 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); } - r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], "%s", fw_name); + r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], AMDGPU_UCODE_REQUIRED, + "%s", fw_name); if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix); r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mes.bin", ucode_prefix); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6852d50caa89..4f057996ef35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -41,6 +41,7 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_vram_mgr.h" #include "amdgpu_vm.h" +#include "amdgpu_dma_buf.h" /** * DOC: amdgpu_object @@ -324,6 +325,9 @@ error_free: * * Allocates and pins a BO for kernel internal use. * + * This function is exported to allow the V4L2 isp device + * external to drm device to create and access the kernel BO. + * * Note: For bo_ptr new BO is only created if bo_ptr points to NULL. * * Returns: @@ -347,6 +351,76 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, return 0; } +EXPORT_SYMBOL(amdgpu_bo_create_kernel); + +/** + * amdgpu_bo_create_isp_user - create user BO for isp + * + * @adev: amdgpu device object + * @dma_buf: DMABUF handle for isp buffer + * @domain: where to place it + * @bo: used to initialize BOs in structures + * @gpu_addr: GPU addr of the pinned BO + * + * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does + * GART alloc to generate gpu_addr for BO to make it accessible through the + * GART aperture for ISP HW. + * + * This function is exported to allow the V4L2 isp device external to drm device + * to create and access the isp user BO. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +int amdgpu_bo_create_isp_user(struct amdgpu_device *adev, + struct dma_buf *dma_buf, u32 domain, struct amdgpu_bo **bo, + u64 *gpu_addr) + +{ + struct drm_gem_object *gem_obj; + int r; + + gem_obj = amdgpu_gem_prime_import(&adev->ddev, dma_buf); + *bo = gem_to_amdgpu_bo(gem_obj); + if (!(*bo)) { + dev_err(adev->dev, "failed to get valid isp user bo\n"); + return -EINVAL; + } + + r = amdgpu_bo_reserve(*bo, false); + if (r) { + dev_err(adev->dev, "(%d) failed to reserve isp user bo\n", r); + return r; + } + + r = amdgpu_bo_pin(*bo, domain); + if (r) { + dev_err(adev->dev, "(%d) isp user bo pin failed\n", r); + goto error_unreserve; + } + + r = amdgpu_ttm_alloc_gart(&(*bo)->tbo); + if (r) { + dev_err(adev->dev, "%p bind failed\n", *bo); + goto error_unpin; + } + + if (!WARN_ON(!gpu_addr)) + *gpu_addr = amdgpu_bo_gpu_offset(*bo); + + amdgpu_bo_unreserve(*bo); + + return 0; + +error_unpin: + amdgpu_bo_unpin(*bo); +error_unreserve: + amdgpu_bo_unreserve(*bo); + amdgpu_bo_unref(bo); + + return r; +} +EXPORT_SYMBOL(amdgpu_bo_create_isp_user); /** * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location @@ -423,6 +497,9 @@ error: * @cpu_addr: pointer to where the BO's CPU memory space address was stored * * unmaps and unpin a BO for kernel internal use. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to free the kernel BO. */ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr) @@ -447,6 +524,30 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, if (cpu_addr) *cpu_addr = NULL; } +EXPORT_SYMBOL(amdgpu_bo_free_kernel); + +/** + * amdgpu_bo_free_isp_user - free BO for isp use + * + * @bo: amdgpu isp user BO to free + * + * unpin and unref BO for isp internal use. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to free the isp user BO. + */ +void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo) +{ + if (bo == NULL) + return; + + if (amdgpu_bo_reserve(bo, true) == 0) { + amdgpu_bo_unpin(bo); + amdgpu_bo_unreserve(bo); + } + amdgpu_bo_unref(&bo); +} +EXPORT_SYMBOL(amdgpu_bo_free_isp_user); /* Validate bo size is bit bigger than the request domain */ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index be6769852ece..ab3fe7b42da7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -260,6 +260,10 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, u64 *gpu_addr, void **cpu_addr); +int amdgpu_bo_create_isp_user(struct amdgpu_device *adev, + struct dma_buf *dbuf, u32 domain, + struct amdgpu_bo **bo, + u64 *gpu_addr); int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, uint64_t offset, uint64_t size, struct amdgpu_bo **bo_ptr, void **cpu_addr); @@ -271,6 +275,7 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev, struct amdgpu_bo_vm **ubo_ptr); void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr); +void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); void *amdgpu_bo_kptr(struct amdgpu_bo *bo); void amdgpu_bo_kunmap(struct amdgpu_bo *bo); @@ -337,8 +342,7 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, struct drm_suballoc **sa_bo, unsigned int size); -void amdgpu_sa_bo_free(struct amdgpu_device *adev, - struct drm_suballoc **sa_bo, +void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo, struct dma_fence *fence); #if defined(CONFIG_DEBUG_FS) void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 448f9e742983..3c1b08441a6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -208,6 +208,7 @@ static int psp_early_init(struct amdgpu_ip_block *ip_block) psp->boot_time_tmr = false; fallthrough; case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = false; @@ -359,6 +360,7 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev, int i; if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) return false; @@ -870,6 +872,7 @@ static bool psp_skip_tmr(struct psp_context *psp) case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): return true; default: @@ -2264,7 +2267,8 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id) return -EINVAL; if (ta_cmd_id != TA_SECUREDISPLAY_COMMAND__QUERY_TA && - ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC) + ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC && + ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2) return -EINVAL; ret = psp_ta_invoke(psp, ta_cmd_id, &psp->securedisplay_context.context); @@ -2385,6 +2389,15 @@ static int psp_hw_start(struct psp_context *psp) } } + if ((is_psp_fw_valid(psp->spdm_drv)) && + (psp->funcs->bootloader_load_spdm_drv != NULL)) { + ret = psp_bootloader_load_spdm_drv(psp); + if (ret) { + dev_err(adev->dev, "PSP load spdm_drv failed!\n"); + return ret; + } + } + if ((is_psp_fw_valid(psp->sos)) && (psp->funcs->bootloader_load_sos != NULL)) { ret = psp_bootloader_load_sos(psp); @@ -3289,7 +3302,8 @@ int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name) const struct psp_firmware_header_v1_0 *asd_hdr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, "amdgpu/%s_asd.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_asd.bin", chip_name); if (err) goto out; @@ -3311,7 +3325,8 @@ int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name) const struct psp_firmware_header_v1_0 *toc_hdr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, "amdgpu/%s_toc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_toc.bin", chip_name); if (err) goto out; @@ -3407,6 +3422,12 @@ static int parse_sos_bin_descriptor(struct psp_context *psp, psp->ipkeymgr_drv.size_bytes = le32_to_cpu(desc->size_bytes); psp->ipkeymgr_drv.start_addr = ucode_start_addr; break; + case PSP_FW_TYPE_PSP_SPDM_DRV: + psp->spdm_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->spdm_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->spdm_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->spdm_drv.start_addr = ucode_start_addr; + break; default: dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type); break; @@ -3474,7 +3495,8 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) uint8_t *ucode_array_start_addr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sos.bin", chip_name); if (err) goto out; @@ -3750,7 +3772,8 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name) struct amdgpu_device *adev = psp->adev; int err; - err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, "amdgpu/%s_ta.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_ta.bin", chip_name); if (err) return err; @@ -3785,7 +3808,8 @@ int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name) return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, "amdgpu/%s_cap.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, AMDGPU_UCODE_OPTIONAL, + "amdgpu/%s_cap.bin", chip_name); if (err) { if (err == -ENODEV) { dev_warn(adev->dev, "cap microcode does not exist, skip\n"); @@ -3849,13 +3873,13 @@ int psp_config_sq_perfmon(struct psp_context *psp, return ret; } -static int psp_set_clockgating_state(void *handle, +static int psp_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int psp_set_powergating_state(void *handle, +static int psp_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -3908,7 +3932,8 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, if (!drm_dev_enter(ddev, &idx)) return -ENODEV; - ret = amdgpu_ucode_request(adev, &usbc_pd_fw, "amdgpu/%s", buf); + ret = amdgpu_ucode_request(adev, &usbc_pd_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s", buf); if (ret) goto fail; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 567cb1f924ca..8d5acc415d38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -80,6 +80,7 @@ enum psp_bootloader_cmd { PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, + PSP_BL__LOAD_SPDMDRV = 0x20000000, }; enum psp_ring_type { @@ -120,6 +121,7 @@ struct psp_funcs { int (*bootloader_load_dbg_drv)(struct psp_context *psp); int (*bootloader_load_ras_drv)(struct psp_context *psp); int (*bootloader_load_ipkeymgr_drv)(struct psp_context *psp); + int (*bootloader_load_spdm_drv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); @@ -343,6 +345,7 @@ struct psp_context { struct psp_bin_desc dbg_drv; struct psp_bin_desc ras_drv; struct psp_bin_desc ipkeymgr_drv; + struct psp_bin_desc spdm_drv; /* tmr buffer */ struct amdgpu_bo *tmr_bo; @@ -434,6 +437,9 @@ struct amdgpu_psp_funcs { #define psp_bootloader_load_ipkeymgr_drv(psp) \ ((psp)->funcs->bootloader_load_ipkeymgr_drv ? \ (psp)->funcs->bootloader_load_ipkeymgr_drv((psp)) : 0) +#define psp_bootloader_load_spdm_drv(psp) \ + ((psp)->funcs->bootloader_load_spdm_drv ? \ + (psp)->funcs->bootloader_load_spdm_drv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 4c9fa24dd972..01c947066a2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -36,6 +36,7 @@ #include "amdgpu_xgmi.h" #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include "nbio_v4_3.h" +#include "nbif_v6_3_1.h" #include "nbio_v7_9.h" #include "atom.h" #include "amdgpu_reset.h" @@ -192,7 +193,7 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, - err_data.err_addr_cnt); + err_data.err_addr_cnt, false); amdgpu_ras_save_bad_pages(adev, NULL); } @@ -2015,6 +2016,7 @@ static bool amdgpu_ras_aca_is_supported(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): ret = true; break; @@ -2156,6 +2158,16 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) /* Fatal error events are handled on host side */ if (amdgpu_sriov_vf(adev)) return; + /** + * If the current interrupt is caused by a non-fatal RAS error, skip + * check for fatal error. For fatal errors, FED status of all devices + * in XGMI hive gets set when the first device gets fatal error + * interrupt. The error gets propagated to other devices as well, so + * make sure to ack the interrupt regardless of FED status. + */ + if (!amdgpu_ras_get_fed_status(adev) && + amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY)) + return; if (adev->nbio.ras && adev->nbio.ras->handle_ras_controller_intr_no_bifring) @@ -2185,6 +2197,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * if (ret) return; + amdgpu_ras_set_err_poison(adev, block_obj->ras_comm.block); /* both query_poison_status and handle_poison_consumption are optional, * but at least one of them should be implemented if we need poison * consumption handler @@ -2717,40 +2730,192 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, return 0; } +static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev, + struct eeprom_table_record *bps, + struct ras_err_data *err_data) +{ + struct ta_ras_query_address_input addr_in; + uint32_t socket = 0; + int ret = 0; + + if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) + socket = adev->smuio.funcs->get_socket_id(adev); + + /* reinit err_data */ + err_data->err_addr_cnt = 0; + err_data->err_addr_len = adev->umc.retire_unit; + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = bps->address; + addr_in.ma.socket_id = socket; + addr_in.ma.ch_inst = bps->mem_channel; + /* tell RAS TA the node instance is not used */ + addr_in.ma.node_inst = TA_RAS_INV_NODE; + + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) + ret = adev->umc.ras->convert_ras_err_addr(adev, err_data, + &addr_in, NULL, false); + + return ret; +} + +static int amdgpu_ras_mca2pa(struct amdgpu_device *adev, + struct eeprom_table_record *bps, + struct ras_err_data *err_data) +{ + struct ta_ras_query_address_input addr_in; + uint32_t die_id, socket = 0; + + if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) + socket = adev->smuio.funcs->get_socket_id(adev); + + /* although die id is gotten from PA in nps1 mode, the id is + * fitable for any nps mode + */ + if (adev->umc.ras && adev->umc.ras->get_die_id_from_pa) + die_id = adev->umc.ras->get_die_id_from_pa(adev, bps->address, + bps->retired_page << AMDGPU_GPU_PAGE_SHIFT); + else + return -EINVAL; + + /* reinit err_data */ + err_data->err_addr_cnt = 0; + err_data->err_addr_len = adev->umc.retire_unit; + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = bps->address; + addr_in.ma.ch_inst = bps->mem_channel; + addr_in.ma.umc_inst = bps->mcumc_id; + addr_in.ma.node_inst = die_id; + addr_in.ma.socket_id = socket; + + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) + return adev->umc.ras->convert_ras_err_addr(adev, err_data, + &addr_in, NULL, false); + else + return -EINVAL; +} + /* it deal with vram only. */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, - struct eeprom_table_record *bps, int pages) + struct eeprom_table_record *bps, int pages, bool from_rom) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; + struct ras_err_data err_data; + struct eeprom_table_record *err_rec; + struct amdgpu_ras_eeprom_control *control = + &adev->psp.ras_context.ras->eeprom_control; + enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; int ret = 0; - uint32_t i; + uint32_t i, j, loop_cnt = 1; + bool find_pages_per_pa = false; if (!con || !con->eh_data || !bps || pages <= 0) return 0; + if (from_rom) { + err_data.err_addr = + kcalloc(adev->umc.retire_unit, + sizeof(struct eeprom_table_record), GFP_KERNEL); + if (!err_data.err_addr) { + dev_warn(adev->dev, "Failed to alloc UMC error address record in mca2pa conversion!\n"); + ret = -ENOMEM; + goto out; + } + + err_rec = err_data.err_addr; + loop_cnt = adev->umc.retire_unit; + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + } + mutex_lock(&con->recovery_lock); data = con->eh_data; if (!data) - goto out; + goto free; for (i = 0; i < pages; i++) { - if (amdgpu_ras_check_bad_page_unlock(con, - bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) - continue; + if (from_rom && + control->rec_type == AMDGPU_RAS_EEPROM_REC_MCA) { + if (!find_pages_per_pa) { + if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) { + if (!i && nps == AMDGPU_NPS1_PARTITION_MODE) { + /* may use old RAS TA, use PA to find pages in + * one row + */ + if (amdgpu_umc_pages_in_a_row(adev, &err_data, + bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) + goto free; + else + find_pages_per_pa = true; + } else { + /* unsupported cases */ + goto free; + } + } + } else { + if (amdgpu_umc_pages_in_a_row(adev, &err_data, + bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) + goto free; + } + } else { + if (from_rom && !find_pages_per_pa) { + if (bps[i].retired_page & UMC_CHANNEL_IDX_V2) { + /* bad page in any NPS mode in eeprom */ + if (amdgpu_ras_mca2pa_by_idx(adev, &bps[i], &err_data)) + goto free; + } else { + /* legacy bad page in eeprom, generated only in + * NPS1 mode + */ + if (amdgpu_ras_mca2pa(adev, &bps[i], &err_data)) { + /* old RAS TA or ASICs which don't support to + * convert addrss via mca address + */ + if (!i && nps == AMDGPU_NPS1_PARTITION_MODE) { + find_pages_per_pa = true; + err_rec = &bps[i]; + loop_cnt = 1; + } else { + /* non-nps1 mode, old RAS TA + * can't support it + */ + goto free; + } + } + } - if (!data->space_left && - amdgpu_ras_realloc_eh_data_space(adev, data, 256)) { - ret = -ENOMEM; - goto out; + if (!find_pages_per_pa) + i += (adev->umc.retire_unit - 1); + } else { + err_rec = &bps[i]; + } } - amdgpu_ras_reserve_page(adev, bps[i].retired_page); + for (j = 0; j < loop_cnt; j++) { + if (amdgpu_ras_check_bad_page_unlock(con, + err_rec[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) + continue; + + if (!data->space_left && + amdgpu_ras_realloc_eh_data_space(adev, data, 256)) { + ret = -ENOMEM; + goto free; + } + + amdgpu_ras_reserve_page(adev, err_rec[j].retired_page); - memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps)); - data->count++; - data->space_left--; + memcpy(&data->bps[data->count], &(err_rec[j]), + sizeof(struct eeprom_table_record)); + data->count++; + data->space_left--; + } } + +free: + if (from_rom) + kfree(err_data.err_addr); out: mutex_unlock(&con->recovery_lock); @@ -2768,7 +2933,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; struct amdgpu_ras_eeprom_control *control; - int save_count; + int save_count, unit_num, bad_page_num, i; if (!con || !con->eh_data) { if (new_cnt) @@ -2780,19 +2945,32 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, mutex_lock(&con->recovery_lock); control = &con->eeprom_control; data = con->eh_data; - save_count = data->count - control->ras_num_recs; + bad_page_num = control->ras_num_bad_pages; + save_count = data->count - bad_page_num; mutex_unlock(&con->recovery_lock); + unit_num = save_count / adev->umc.retire_unit; if (new_cnt) - *new_cnt = save_count / adev->umc.retire_unit; + *new_cnt = unit_num; /* only new entries are saved */ if (save_count > 0) { - if (amdgpu_ras_eeprom_append(control, - &data->bps[control->ras_num_recs], - save_count)) { - dev_err(adev->dev, "Failed to save EEPROM table data!"); - return -EIO; + if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) { + if (amdgpu_ras_eeprom_append(control, + &data->bps[control->ras_num_recs], + save_count)) { + dev_err(adev->dev, "Failed to save EEPROM table data!"); + return -EIO; + } + } else { + for (i = 0; i < unit_num; i++) { + if (amdgpu_ras_eeprom_append(control, + &data->bps[bad_page_num + i * adev->umc.retire_unit], + 1)) { + dev_err(adev->dev, "Failed to save EEPROM table data!"); + return -EIO; + } + } } dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count); @@ -2821,11 +2999,32 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) return -ENOMEM; ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs); - if (ret) + if (ret) { dev_err(adev->dev, "Failed to load EEPROM table records!"); - else - ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs); + } else { + if (control->ras_num_recs > 1 && + adev->umc.ras && adev->umc.ras->convert_ras_err_addr) { + if ((bps[0].address == bps[1].address) && + (bps[0].mem_channel == bps[1].mem_channel)) + control->rec_type = AMDGPU_RAS_EEPROM_REC_PA; + else + control->rec_type = AMDGPU_RAS_EEPROM_REC_MCA; + } + + ret = amdgpu_ras_eeprom_check(control); + if (ret) + goto out; + + /* HW not usable */ + if (amdgpu_ras_is_rma(adev)) { + ret = -EHWPOISON; + goto out; + } + + ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true); + } +out: kfree(bps); return ret; } @@ -3205,31 +3404,36 @@ static int amdgpu_ras_page_retirement_thread(void *param) int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_ras_eeprom_control *control; int ret; if (!con || amdgpu_sriov_vf(adev)) return 0; - ret = amdgpu_ras_eeprom_init(&con->eeprom_control); - + control = &con->eeprom_control; + ret = amdgpu_ras_eeprom_init(control); if (ret) return ret; - /* HW not usable */ - if (amdgpu_ras_is_rma(adev)) - return -EHWPOISON; + if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr) + control->rec_type = AMDGPU_RAS_EEPROM_REC_PA; + + /* default status is MCA storage */ + if (control->ras_num_recs <= 1 && + adev->umc.ras && adev->umc.ras->convert_ras_err_addr) + control->rec_type = AMDGPU_RAS_EEPROM_REC_MCA; - if (con->eeprom_control.ras_num_recs) { + if (control->ras_num_recs) { ret = amdgpu_ras_load_bad_pages(adev); if (ret) return ret; amdgpu_dpm_send_hbm_bad_pages_num( - adev, con->eeprom_control.ras_num_recs); + adev, control->ras_num_bad_pages); if (con->update_channel_flag == true) { amdgpu_dpm_send_hbm_bad_channel_flag( - adev, con->eeprom_control.bad_channel_bitmap); + adev, control->bad_channel_bitmap); con->update_channel_flag = false; } } @@ -3366,6 +3570,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): return true; default: @@ -3378,7 +3583,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): + case IP_VERSION(14, 0, 3): return true; default: return false; @@ -3629,6 +3836,7 @@ static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE; break; @@ -3704,7 +3912,19 @@ int amdgpu_ras_init(struct amdgpu_device *adev) * check DF RAS */ adev->nbio.ras = &nbio_v4_3_ras; break; + case IP_VERSION(6, 3, 1): + if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF)) + /* unlike other generation of nbio ras, + * nbif v6_3_1 only support fatal error interrupt + * to inform software that DF is freezed due to + * system fatal error event. driver should not + * enable nbio ras in such case. Instead, + * check DF RAS + */ + adev->nbio.ras = &nbif_v6_3_1_ras; + break; case IP_VERSION(7, 9, 0): + case IP_VERSION(7, 9, 1): if (!adev->gmc.is_app_apu) adev->nbio.ras = &nbio_v7_9_ras; break; @@ -4083,7 +4303,7 @@ bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev) if (!ras) return false; - return atomic_read(&ras->fed); + return test_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state); } void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) @@ -4091,8 +4311,48 @@ void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) struct amdgpu_ras *ras; ras = amdgpu_ras_get_context(adev); + if (ras) { + if (status) + set_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state); + else + clear_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state); + } +} + +void amdgpu_ras_clear_err_state(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); if (ras) - atomic_set(&ras->fed, !!status); + ras->ras_err_state = 0; +} + +void amdgpu_ras_set_err_poison(struct amdgpu_device *adev, + enum amdgpu_ras_block block) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (ras) + set_bit(block, &ras->ras_err_state); +} + +bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (ras) { + if (block == AMDGPU_RAS_BLOCK__ANY) + return (ras->ras_err_state != 0); + else + return test_bit(block, &ras->ras_err_state) || + test_bit(AMDGPU_RAS_BLOCK__LAST, + &ras->ras_err_state); + } + + return false; } static struct ras_event_manager *__get_ras_event_mgr(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 6db772ecfee4..82db986c36a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -99,7 +99,8 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__IH, AMDGPU_RAS_BLOCK__MPIO, - AMDGPU_RAS_BLOCK__LAST + AMDGPU_RAS_BLOCK__LAST, + AMDGPU_RAS_BLOCK__ANY = -1 }; enum amdgpu_ras_mca_block { @@ -482,6 +483,8 @@ struct ras_ecc_err { uint64_t ipid; uint64_t addr; uint64_t pa_pfn; + /* save global channel index across all UMC instances */ + uint32_t channel_idx; struct ras_err_pages err_pages; }; @@ -558,8 +561,8 @@ struct amdgpu_ras { struct ras_ecc_log_info umc_ecc_log; struct delayed_work page_retirement_dwork; - /* Fatal error detected flag */ - atomic_t fed; + /* ras errors detected */ + unsigned long ras_err_state; /* RAS event manager */ struct ras_event_manager __event_mgr; @@ -750,7 +753,7 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, /* error handling functions */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, - struct eeprom_table_record *bps, int pages); + struct eeprom_table_record *bps, int pages, bool from_rom); int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, unsigned long *new_cnt); @@ -952,6 +955,10 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); +void amdgpu_ras_set_err_poison(struct amdgpu_device *adev, + enum amdgpu_ras_block block); +void amdgpu_ras_clear_err_state(struct amdgpu_device *adev); +bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block); u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type); int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index f28f6b4ba765..52c16bfeccaa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -470,9 +470,10 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) res = __write_table_ras_info(control); control->ras_num_recs = 0; + control->ras_num_bad_pages = 0; control->ras_fri = 0; - amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_recs); + amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_bad_pages); control->bad_channel_bitmap = 0; amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap); @@ -559,7 +560,7 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev) if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) { if (amdgpu_bad_page_threshold == -1) { dev_warn(adev->dev, "RAS records:%d exceed threshold:%d", - con->eeprom_control.ras_num_recs, con->bad_page_cnt_threshold); + con->eeprom_control.ras_num_bad_pages, con->bad_page_cnt_threshold); dev_warn(adev->dev, "But GPU can be operated due to bad_page_threshold = -1.\n"); return false; @@ -621,6 +622,7 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, const u32 num) { struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control)); + struct amdgpu_device *adev = to_amdgpu_device(control); u32 a, b, i; u8 *buf, *pp; int res; @@ -723,6 +725,12 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, control->ras_num_recs = 1 + (control->ras_max_record_count + b - control->ras_fri) % control->ras_max_record_count; + + if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) + control->ras_num_bad_pages = control->ras_num_recs; + else + control->ras_num_bad_pages = + control->ras_num_recs * adev->umc.retire_unit; Out: kfree(buf); return res; @@ -740,10 +748,10 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) /* Modify the header if it exceeds. */ if (amdgpu_bad_page_threshold != 0 && - control->ras_num_recs >= ras->bad_page_cnt_threshold) { + control->ras_num_bad_pages >= ras->bad_page_cnt_threshold) { dev_warn(adev->dev, "Saved bad pages %d reaches threshold value %d\n", - control->ras_num_recs, ras->bad_page_cnt_threshold); + control->ras_num_bad_pages, ras->bad_page_cnt_threshold); control->tbl_hdr.header = RAS_TABLE_HDR_BAD; if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) { control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; @@ -798,9 +806,9 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) */ if (amdgpu_bad_page_threshold != 0 && control->tbl_hdr.version == RAS_TABLE_VER_V2_1 && - control->ras_num_recs < ras->bad_page_cnt_threshold) + control->ras_num_bad_pages < ras->bad_page_cnt_threshold) control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold - - control->ras_num_recs) * 100) / + control->ras_num_bad_pages) * 100) / ras->bad_page_cnt_threshold; /* Recalc the checksum. @@ -841,7 +849,7 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, const u32 num) { struct amdgpu_device *adev = to_amdgpu_device(control); - int res; + int res, i; if (!__is_ras_eeprom_supported(adev)) return 0; @@ -855,6 +863,10 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, return -EINVAL; } + /* set the new channel index flag */ + for (i = 0; i < num; i++) + record[i].retired_page |= UMC_CHANNEL_IDX_V2; + mutex_lock(&control->ras_tbl_mutex); res = amdgpu_ras_eeprom_append_table(control, record, num); @@ -864,6 +876,11 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, amdgpu_ras_debugfs_set_ret_size(control); mutex_unlock(&control->ras_tbl_mutex); + + /* clear channel index flag, the flag is only saved on eeprom */ + for (i = 0; i < num; i++) + record[i].retired_page &= ~UMC_CHANNEL_IDX_V2; + return res; } @@ -1373,9 +1390,35 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) } control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); + return 0; +} + +int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int res; + + if (!__is_ras_eeprom_supported(adev)) + return 0; + + /* Verify i2c adapter is initialized */ + if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo) + return -ENOENT; + + if (!__get_eeprom_i2c_addr(adev, control)) + return -EINVAL; + + if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA) + control->ras_num_bad_pages = control->ras_num_recs; + else + control->ras_num_bad_pages = + control->ras_num_recs * adev->umc.retire_unit; + if (hdr->header == RAS_TABLE_HDR_VAL) { DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", - control->ras_num_recs); + control->ras_num_bad_pages); if (hdr->version == RAS_TABLE_VER_V2_1) { res = __read_table_ras_info(control); @@ -1390,9 +1433,9 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) /* Warn if we are at 90% of the threshold or above */ - if (10 * control->ras_num_recs >= 9 * ras->bad_page_cnt_threshold) + if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold) dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d", - control->ras_num_recs, + control->ras_num_bad_pages, ras->bad_page_cnt_threshold); } else if (hdr->header == RAS_TABLE_HDR_BAD && amdgpu_bad_page_threshold != 0) { @@ -1403,10 +1446,12 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) } res = __verify_ras_table_checksum(control); - if (res) - DRM_ERROR("RAS Table incorrect checksum or error:%d\n", + if (res) { + dev_err(adev->dev, "RAS Table incorrect checksum or error:%d\n", res); - if (ras->bad_page_cnt_threshold > control->ras_num_recs) { + return -EINVAL; + } + if (ras->bad_page_cnt_threshold > control->ras_num_bad_pages) { /* This means that, the threshold was increased since * the last time the system was booted, and now, * ras->bad_page_cnt_threshold - control->num_recs > 0, @@ -1416,13 +1461,13 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) dev_info(adev->dev, "records:%d threshold:%d, resetting " "RAS table header signature", - control->ras_num_recs, + control->ras_num_bad_pages, ras->bad_page_cnt_threshold); res = amdgpu_ras_eeprom_correct_header_tag(control, RAS_TABLE_HDR_VAL); } else { dev_err(adev->dev, "RAS records:%d exceed threshold:%d", - control->ras_num_recs, ras->bad_page_cnt_threshold); + control->ras_num_bad_pages, ras->bad_page_cnt_threshold); if (amdgpu_bad_page_threshold == -1) { dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -1."); res = 0; @@ -1431,7 +1476,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) dev_err(adev->dev, "RAS records:%d exceed threshold:%d, " "GPU will not be initialized. Replace this GPU or increase the threshold", - control->ras_num_recs, ras->bad_page_cnt_threshold); + control->ras_num_bad_pages, ras->bad_page_cnt_threshold); } } } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index b9ebda577797..81d55cb7b397 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -43,6 +43,19 @@ enum amdgpu_ras_eeprom_err_type { AMDGPU_RAS_EEPROM_ERR_COUNT, }; +/* + * one UMC MCA address could map to multiply physical address (PA), + * such as 1:16, we use eeprom_table_record.address to store MCA + * address and use eeprom_table_record.retired_page to save PA. + * + * AMDGPU_RAS_EEPROM_REC_PA: one record store one PA + * AMDGPU_RAS_EEPROM_REC_MCA: one record store one MCA address + */ +enum amdgpu_ras_eeprom_rec_type { + AMDGPU_RAS_EEPROM_REC_PA, + AMDGPU_RAS_EEPROM_REC_MCA, +}; + struct amdgpu_ras_eeprom_table_header { uint32_t header; uint32_t version; @@ -82,6 +95,11 @@ struct amdgpu_ras_eeprom_control { */ u32 ras_num_recs; + /* the bad page number is ras_num_recs or + * ras_num_recs * umc.retire_unit + */ + u32 ras_num_bad_pages; + /* First record index to read, 0-based. * Range is [0, num_recs-1]. This is * an absolute index, starting right after @@ -102,6 +120,7 @@ struct amdgpu_ras_eeprom_control { /* Record channel info which occurred bad pages */ u32 bad_channel_bitmap; + enum amdgpu_ras_eeprom_rec_type rec_type; }; /* @@ -145,6 +164,8 @@ uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *co void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control); +int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control); + extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops; extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index a0acb65f4b40..dabfbdf6f1ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -183,6 +183,7 @@ int amdgpu_reset_init(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): ret = aldebaran_reset_init(adev); break; @@ -206,6 +207,7 @@ int amdgpu_reset_fini(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): ret = aldebaran_reset_fini(adev); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 36fc9578c53c..dee5a1b4e572 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -462,8 +462,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, enum amdgpu_ib_pool_type pool, struct amdgpu_ib *ib); -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct dma_fence *f); +void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f); int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_ib *ibs, struct amdgpu_job *job, struct dma_fence **f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 10df731998b2..39070b2a4c04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -93,8 +93,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, return 0; } -void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct drm_suballoc **sa_bo, - struct dma_fence *fence) +void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo, struct dma_fence *fence) { if (sa_bo == NULL || *sa_bo == NULL) { return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 113f0d242618..632295bf3875 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -219,9 +219,11 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix)); if (instance == 0) err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s.bin", ucode_prefix); else err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s%d.bin", ucode_prefix, instance); if (err) goto out; @@ -261,6 +263,8 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) || amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 4, 4) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) && adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c8180cad0abd..2d85f1ec3041 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1762,7 +1762,8 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) if (!adev->bios && (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))) reserve_size = max(reserve_size, (uint32_t)280 << 20); else if (!reserve_size) reserve_size = DISCOVERY_TMR_OFFSET; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 4c7b53648a50..cf700824b960 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1434,6 +1434,7 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, * * @adev: amdgpu device * @fw: pointer to load firmware to + * @required: whether the firmware is required * @fmt: firmware name format string * @...: variable arguments * @@ -1442,7 +1443,7 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, * the error code to -ENODEV, so that early_init functions will fail to load. */ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, - const char *fmt, ...) + enum amdgpu_ucode_required required, const char *fmt, ...) { char fname[AMDGPU_UCODE_NAME_MAX]; va_list ap; @@ -1456,16 +1457,24 @@ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, return -EOVERFLOW; } - r = request_firmware(fw, fname, adev->dev); + if (required == AMDGPU_UCODE_REQUIRED) + r = request_firmware(fw, fname, adev->dev); + else { + r = firmware_request_nowarn(fw, fname, adev->dev); + if (r) + drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fname); + } if (r) return -ENODEV; r = amdgpu_ucode_validate(*fw); - if (r) { + if (r) + /* + * The amdgpu_ucode_request() should be paired with amdgpu_ucode_release() + * regardless of success/failure, and the amdgpu_ucode_release() takes care of + * firmware release and need to avoid redundant release FW operation here. + */ dev_dbg(adev->dev, "\"%s\" failed to validate\n", fname); - release_firmware(*fw); - *fw = NULL; - } return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4150ec0aa10d..4eedd92f000b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -126,6 +126,7 @@ enum psp_fw_type { PSP_FW_TYPE_PSP_DBG_DRV, PSP_FW_TYPE_PSP_RAS_DRV, PSP_FW_TYPE_PSP_IPKEYMGR_DRV, + PSP_FW_TYPE_PSP_SPDM_DRV, PSP_FW_TYPE_MAX_INDEX, }; @@ -551,6 +552,11 @@ enum amdgpu_firmware_load_type { AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO, }; +enum amdgpu_ucode_required { + AMDGPU_UCODE_OPTIONAL, + AMDGPU_UCODE_REQUIRED, +}; + /* conform to smu_ucode_xfer_cz.h */ #define AMDGPU_SDMA0_UCODE_LOADED 0x00000001 #define AMDGPU_SDMA1_UCODE_LOADED 0x00000002 @@ -604,9 +610,9 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr); -__printf(3, 4) +__printf(4, 5) int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, - const char *fmt, ...); + enum amdgpu_ucode_required required, const char *fmt, ...); void amdgpu_ucode_release(const struct firmware **fw); bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, uint16_t hdr_major, uint16_t hdr_minor); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 896f3609b0ee..eafe20d8fe0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -78,7 +78,7 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, - err_data.err_addr_cnt); + err_data.err_addr_cnt, false); amdgpu_ras_save_bad_pages(adev, NULL); } @@ -166,10 +166,11 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if ((amdgpu_bad_page_threshold != 0) && err_data->err_addr_cnt) { amdgpu_ras_add_bad_pages(adev, err_data->err_addr, - err_data->err_addr_cnt); + err_data->err_addr_cnt, false); amdgpu_ras_save_bad_pages(adev, &err_count); - amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); + amdgpu_dpm_send_hbm_bad_pages_num(adev, + con->eeprom_control.ras_num_bad_pages); if (con->update_channel_flag == true) { amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap); @@ -444,3 +445,77 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, return ret; } + +int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t pa_addr) +{ + struct ta_ras_query_address_output addr_out; + + /* reinit err_data */ + err_data->err_addr_cnt = 0; + err_data->err_addr_len = adev->umc.retire_unit; + + addr_out.pa.pa = pa_addr; + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) + return adev->umc.ras->convert_ras_err_addr(adev, err_data, NULL, + &addr_out, false); + else + return -EINVAL; +} + +int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, + uint64_t pa_addr, uint64_t *pfns, int len) +{ + int i, ret; + struct ras_err_data err_data; + + err_data.err_addr = kcalloc(adev->umc.retire_unit, + sizeof(struct eeprom_table_record), GFP_KERNEL); + if (!err_data.err_addr) { + dev_warn(adev->dev, "Failed to alloc memory in bad page lookup!\n"); + return 0; + } + + ret = amdgpu_umc_pages_in_a_row(adev, &err_data, pa_addr); + if (ret) + goto out; + + for (i = 0; i < adev->umc.retire_unit; i++) { + if (i >= len) + goto out; + + pfns[i] = err_data.err_addr[i].retired_page; + } + ret = i; + +out: + kfree(err_data.err_addr); + return ret; +} + +int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch, uint32_t umc, + uint32_t node, uint32_t socket, + struct ta_ras_query_address_output *addr_out, bool dump_addr) +{ + struct ta_ras_query_address_input addr_in; + int ret; + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = ch; + addr_in.ma.umc_inst = umc; + addr_in.ma.node_inst = node; + addr_in.ma.socket_id = socket; + + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) { + ret = adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in, + addr_out, dump_addr); + if (ret) + return ret; + } else { + return 0; + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index ce4179db2a6d..a4a7e61817aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -54,6 +54,22 @@ /* Page retirement tag */ #define UMC_ECC_NEW_DETECTED_TAG 0x1 +/* + * a flag to indicate v2 of channel index stored in eeprom + * + * v1 (legacy way): store channel index within a umc instance in eeprom + * range in UMC v12: 0 ~ 7 + * v2: store global channel index in eeprom + * range in UMC v12: 0 ~ 127 + * + * NOTE: it's better to store it in eeprom_table_record.mem_channel, + * but there is only 8 bits in mem_channel, and the channel number may + * increase in the future, we decide to save it in + * eeprom_table_record.retired_page. retired_page is useless in v2, + * we depend on eeprom_table_record.address instead of retired_page in v2. + * Only 48 bits are saved on eeprom, use bit 47 here. + */ +#define UMC_CHANNEL_IDX_V2 BIT_ULL(47) typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data); @@ -70,6 +86,13 @@ struct amdgpu_umc_ras { enum amdgpu_mca_error_type type, void *ras_error_status); int (*update_ecc_status)(struct amdgpu_device *adev, uint64_t status, uint64_t ipid, uint64_t addr); + int (*convert_ras_err_addr)(struct amdgpu_device *adev, + struct ras_err_data *err_data, + struct ta_ras_query_address_input *addr_in, + struct ta_ras_query_address_output *addr_out, + bool dump_addr); + uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev, + uint64_t mca_addr, uint64_t retired_page); }; struct amdgpu_umc_funcs { @@ -134,4 +157,12 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, void *ras_error_status); +int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t pa_addr); +int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, + uint64_t pa_addr, uint64_t *pfns, int len); +int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch, uint32_t umc, + uint32_t node, uint32_t socket, + struct ta_ras_query_address_output *addr_out, bool dump_addr); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index bd2d3863c3ed..dde15c6a96e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -587,7 +587,8 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) break; } - r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, "%s", fw_name); + r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, AMDGPU_UCODE_REQUIRED, + "%s", fw_name); if (r) { release_firmware(adev->umsch_mm.fw); adev->umsch_mm.fw = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 31fd30dcd593..74758b5ffc6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -260,7 +260,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->uvd.fw, "%s", fw_name); + r = amdgpu_ucode_request(adev, &adev->uvd.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name); if (r) { dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n", fw_name); @@ -551,6 +551,8 @@ static void amdgpu_uvd_force_into_uvd_segment(struct amdgpu_bo *abo) for (i = 0; i < abo->placement.num_placement; ++i) { abo->placements[i].fpfn = 0 >> PAGE_SHIFT; abo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT; + if (abo->placements[i].mem_type == TTM_PL_VRAM) + abo->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 599d3ca4e0ef..b9060bcd4806 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -158,7 +158,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->vce.fw, "%s", fw_name); + r = amdgpu_ucode_request(adev, &adev->vce.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name); if (r) { dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n", fw_name); @@ -503,7 +503,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib->ptr[i] = 0x0; r = amdgpu_job_submit_direct(job, ring, &f); - amdgpu_ib_free(ring->adev, &ib_msg, f); + amdgpu_ib_free(&ib_msg, f); if (r) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 3e94c3ba1ba2..83faf6e6788a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -1,5 +1,5 @@ /* - * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2016-2024 Advanced Micro Devices, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -62,6 +62,7 @@ #define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin" #define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin" #define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin" +#define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -88,6 +89,7 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_5); MODULE_FIRMWARE(FIRMWARE_VCN4_0_6); MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1); MODULE_FIRMWARE(FIRMWARE_VCN5_0_0); +MODULE_FIRMWARE(FIRMWARE_VCN5_0_1); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -99,11 +101,15 @@ int amdgpu_vcn_early_init(struct amdgpu_device *adev) amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix)); for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (i == 1 && amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6)) - r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], "amdgpu/%s_%d.bin", ucode_prefix, i); + r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_%d.bin", ucode_prefix, i); else - r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], "amdgpu/%s.bin", ucode_prefix); + r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (r) { - amdgpu_ucode_release(&adev->vcn.fw[i]); + amdgpu_ucode_release(&adev->vcn.inst[i].fw); return r; } } @@ -151,7 +157,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) adev->vcn.using_unified_queue = amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0); - hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[0].fw->data; adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); /* Bit 20-23, it is encode major and non-zero for new naming convention. @@ -270,7 +276,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_enc_rings; ++i) amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); - amdgpu_ucode_release(&adev->vcn.fw[j]); + amdgpu_ucode_release(&adev->vcn.inst[j].fw); } mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround); @@ -282,7 +288,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) { bool ret = false; - int vcn_config = adev->vcn.vcn_config[vcn_instance]; + int vcn_config = adev->vcn.inst[vcn_instance].vcn_config; if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) ret = true; @@ -362,12 +368,12 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) const struct common_firmware_header *hdr; unsigned int offset; - hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { offset = le32_to_cpu(hdr->ucode_array_offset_bytes); if (drm_dev_enter(adev_to_drm(adev), &idx)) { memcpy_toio(adev->vcn.inst[i].cpu_addr, - adev->vcn.fw[i]->data + offset, + adev->vcn.inst[i].fw->data + offset, le32_to_cpu(hdr->ucode_size_bytes)); drm_dev_exit(idx); } @@ -580,7 +586,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, if (r) goto err_free; - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); if (fence) *fence = dma_fence_get(f); @@ -591,7 +597,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, err_free: amdgpu_job_free(job); err: - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); return r; } @@ -773,7 +779,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, if (r) goto err_free; - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); if (fence) *fence = dma_fence_get(f); @@ -784,7 +790,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, err_free: amdgpu_job_free(job); err: - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); return r; } @@ -1014,7 +1020,7 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = 0; error: - amdgpu_ib_free(adev, &ib, fence); + amdgpu_ib_free(&ib, fence); dma_fence_put(fence); return r; @@ -1025,7 +1031,8 @@ int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct amdgpu_device *adev = ring->adev; long r; - if (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) { + if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) && + (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1))) { r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); if (r) goto error; @@ -1063,7 +1070,7 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; /* currently only support 2 FW instances */ if (i >= 2) { dev_info(adev->dev, "More then 2 VCN FW instances!\n"); @@ -1071,12 +1078,14 @@ void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) } idx = AMDGPU_UCODE_ID_VCN + i; adev->firmware.ucode[idx].ucode_id = idx; - adev->firmware.ucode[idx].fw = adev->vcn.fw[i]; + adev->firmware.ucode[idx].fw = adev->vcn.inst[i].fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); if (amdgpu_ip_version(adev, UVD_HWIP, 0) == - IP_VERSION(4, 0, 3)) + IP_VERSION(4, 0, 3) || + amdgpu_ip_version(adev, UVD_HWIP, 0) == + IP_VERSION(5, 0, 1)) break; } } @@ -1320,3 +1329,71 @@ void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_vcn_reset_mask); } } + +/* + * debugfs to enable/disable vcn job submission to specific core or + * instance. It is created only if the queue type is unified. + */ +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_vcn_sched_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + + mask = (1ULL << adev->vcn.num_vcn_inst) - 1; + if ((val & mask) == 0) + return -EINVAL; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = &adev->vcn.inst[i].ring_enc[0]; + if (val & (1ULL << i)) + ring->sched.ready = true; + else + ring->sched.ready = false; + } + /* publish sched.ready flag update effective immediately across smp */ + smp_rmb(); + return 0; +} + +static int amdgpu_debugfs_vcn_sched_mask_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = &adev->vcn.inst[i].ring_enc[0]; + if (ring->sched.ready) + mask |= 1ULL << i; + } + *val = mask; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_vcn_sched_mask_fops, + amdgpu_debugfs_vcn_sched_mask_get, + amdgpu_debugfs_vcn_sched_mask_set, "%llx\n"); +#endif + +void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + if (adev->vcn.num_vcn_inst <= 1 || !adev->vcn.using_unified_queue) + return; + sprintf(name, "amdgpu_vcn_sched_mask"); + debugfs_create_file(name, 0600, root, adev, + &amdgpu_debugfs_vcn_sched_mask_fops); +#endif +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 1e32311c1dff..adaf4388ad28 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -1,5 +1,5 @@ /* - * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2016-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -163,20 +163,30 @@ #define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \ ({ \ uint32_t internal_reg_offset, addr; \ - bool video_range, aon_range; \ + bool video_range, video1_range, aon_range, aon1_range; \ \ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ addr <<= 2; \ video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS)) && \ ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS + 0x2600))))); \ + video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS)) && \ + ((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS + 0x2600))))); \ aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS)) && \ ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS + 0x600))))); \ + aon1_range = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS)) && \ + ((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS + 0x600))))); \ if (video_range) \ internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS) + \ (VCN_VID_IP_ADDRESS)); \ else if (aon_range) \ internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS) + \ (VCN_AON_IP_ADDRESS)); \ + else if (video1_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS) + \ + (VCN_VID_IP_ADDRESS)); \ + else if (aon1_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS) + \ + (VCN_AON_IP_ADDRESS)); \ else \ internal_reg_offset = (0xFFFFF & addr); \ \ @@ -297,6 +307,9 @@ struct amdgpu_vcn_inst { atomic_t dpg_enc_submission_cnt; struct amdgpu_vcn_fw_shared fw_shared; uint8_t aid_id; + const struct firmware *fw; /* VCN firmware */ + uint8_t vcn_config; + uint32_t vcn_codec_disable_mask; }; struct amdgpu_vcn_ras { @@ -306,15 +319,12 @@ struct amdgpu_vcn_ras { struct amdgpu_vcn { unsigned fw_version; struct delayed_work idle_work; - const struct firmware *fw[AMDGPU_MAX_VCN_INSTANCES]; /* VCN firmware */ unsigned num_enc_rings; enum amd_powergating_state cur_state; bool indirect_sram; uint8_t num_vcn_inst; struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; - uint8_t vcn_config[AMDGPU_MAX_VCN_INSTANCES]; - uint32_t vcn_codec_disable_mask[AMDGPU_MAX_VCN_INSTANCES]; struct amdgpu_vcn_reg internal; struct mutex vcn_pg_lock; struct mutex vcn1_jpeg1_workaround; @@ -523,5 +533,6 @@ int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx, int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev); int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev); void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev); +void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index c704e9803e11..0af469ec6fcc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -1263,12 +1263,10 @@ static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev, if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10)) return 0; - tmp = kmalloc(used_size, GFP_KERNEL); + tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL); if (!tmp) return -ENOMEM; - memcpy(tmp, &host_telemetry->body.error_count, used_size); - if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0)) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 8bf28d336807..03308261f894 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -632,13 +632,13 @@ static bool amdgpu_vkms_is_idle(void *handle) return true; } -static int amdgpu_vkms_set_clockgating_state(void *handle, +static int amdgpu_vkms_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int amdgpu_vkms_set_powergating_state(void *handle, +static int amdgpu_vkms_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8d9bf7a0857f..c9c48b782ec1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -674,12 +674,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; - if (adev->gfx.enable_cleaner_shader && - ring->funcs->emit_cleaner_shader && - job->enforce_isolation) - ring->funcs->emit_cleaner_shader(ring); - - if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && + !(job->enforce_isolation && !job->vmid)) return 0; amdgpu_ring_ib_begin(ring); @@ -690,6 +686,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); + if (adev->gfx.enable_cleaner_shader && + ring->funcs->emit_cleaner_shader && + job->enforce_isolation) + ring->funcs->emit_cleaner_shader(ring); + if (vm_flush_needed) { trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); @@ -1265,10 +1266,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, * next command submission. */ if (amdgpu_vm_is_bo_always_valid(vm, bo)) { - uint32_t mem_type = bo->tbo.resource->mem_type; - - if (!(bo->preferred_domains & - amdgpu_mem_type_to_domain(mem_type))) + if (bo->tbo.resource && + !(bo->preferred_domains & + amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type))) amdgpu_vm_bo_evicted(&bo_va->base); else amdgpu_vm_bo_idle(&bo_va->base); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 110b120d7375..121ee17b522b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -236,7 +236,8 @@ int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe) int ret; amdgpu_ucode_ip_version_decode(adev, VPE_HWIP, fw_prefix, sizeof(fw_prefix)); - ret = amdgpu_ucode_request(adev, &adev->vpe.fw, "amdgpu/%s.bin", fw_prefix); + ret = amdgpu_ucode_request(adev, &adev->vpe.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", fw_prefix); if (ret) goto out; @@ -646,16 +647,16 @@ static int vpe_ring_preempt_ib(struct amdgpu_ring *ring) return r; } -static int vpe_set_clockgating_state(void *handle, +static int vpe_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int vpe_set_powergating_state(void *handle, +static int vpe_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_vpe *vpe = &adev->vpe; if (!adev->pm.dpm_enabled) @@ -833,7 +834,7 @@ static int vpe_ring_test_ib(struct amdgpu_ring *ring, long timeout) ret = (le32_to_cpu(adev->wb.wb[index]) == test_pattern) ? 0 : -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index e2cb1f080e88..08d6787893b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -2161,13 +2161,13 @@ static int cik_common_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int cik_common_set_clockgating_state(void *handle, +static int cik_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int cik_common_set_powergating_state(void *handle, +static int cik_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 1da17755ad53..444563486769 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -402,13 +402,13 @@ static int cik_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int cik_ih_set_clockgating_state(void *handle, +static int cik_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int cik_ih_set_powergating_state(void *handle, +static int cik_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index ede1a028d48d..d9bd8f3f17e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -133,9 +133,11 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (i == 0) err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma1.bin", chip_name); if (err) goto out; @@ -696,7 +698,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1189,11 +1191,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } -static int cik_sdma_set_clockgating_state(void *handle, +static int cik_sdma_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) gate = true; @@ -1204,7 +1206,7 @@ static int cik_sdma_set_clockgating_state(void *handle, return 0; } -static int cik_sdma_set_powergating_state(void *handle, +static int cik_sdma_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index d72973bd570d..82586b76aeda 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -398,14 +398,14 @@ static int cz_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int cz_ih_set_clockgating_state(void *handle, +static int cz_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { // TODO return 0; } -static int cz_ih_set_powergating_state(void *handle, +static int cz_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { // TODO diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 5098c50d54c8..c5e3d2251b18 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2687,6 +2687,32 @@ static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = { .get_scanout_position = amdgpu_crtc_get_scanout_position, }; +static void dce_v10_0_panic_flush(struct drm_plane *plane) +{ + struct drm_framebuffer *fb; + struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_device *adev; + uint32_t fb_format; + + if (!plane->fb) + return; + + fb = plane->fb; + amdgpu_crtc = to_amdgpu_crtc(plane->crtc); + adev = drm_to_adev(fb->dev); + + /* Disable DC tiling */ + fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); + fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK; + WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); + +} + +static const struct drm_plane_helper_funcs dce_v10_0_drm_primary_plane_helper_funcs = { + .get_scanout_buffer = amdgpu_display_get_scanout_buffer, + .panic_flush = dce_v10_0_panic_flush, +}; + static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; @@ -2734,6 +2760,7 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->encoder = NULL; amdgpu_crtc->connector = NULL; drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v10_0_crtc_helper_funcs); + drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v10_0_drm_primary_plane_helper_funcs); return 0; } @@ -3302,13 +3329,13 @@ static int dce_v10_0_hpd_irq(struct amdgpu_device *adev, return 0; } -static int dce_v10_0_set_clockgating_state(void *handle, +static int dce_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int dce_v10_0_set_powergating_state(void *handle, +static int dce_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index c5680ff4ab9f..ea42a4472bf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2800,6 +2800,32 @@ static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = { .get_scanout_position = amdgpu_crtc_get_scanout_position, }; +static void dce_v11_0_panic_flush(struct drm_plane *plane) +{ + struct drm_framebuffer *fb; + struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_device *adev; + uint32_t fb_format; + + if (!plane->fb) + return; + + fb = plane->fb; + amdgpu_crtc = to_amdgpu_crtc(plane->crtc); + adev = drm_to_adev(fb->dev); + + /* Disable DC tiling */ + fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); + fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK; + WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); + +} + +static const struct drm_plane_helper_funcs dce_v11_0_drm_primary_plane_helper_funcs = { + .get_scanout_buffer = amdgpu_display_get_scanout_buffer, + .panic_flush = dce_v11_0_panic_flush, +}; + static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; @@ -2847,6 +2873,7 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->encoder = NULL; amdgpu_crtc->connector = NULL; drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v11_0_crtc_helper_funcs); + drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v11_0_drm_primary_plane_helper_funcs); return 0; } @@ -3434,13 +3461,13 @@ static int dce_v11_0_hpd_irq(struct amdgpu_device *adev, return 0; } -static int dce_v11_0_set_clockgating_state(void *handle, +static int dce_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int dce_v11_0_set_powergating_state(void *handle, +static int dce_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index eb7de9122d99..915804a6a1d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -2602,6 +2602,32 @@ static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = { .get_scanout_position = amdgpu_crtc_get_scanout_position, }; +static void dce_v6_0_panic_flush(struct drm_plane *plane) +{ + struct drm_framebuffer *fb; + struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_device *adev; + uint32_t fb_format; + + if (!plane->fb) + return; + + fb = plane->fb; + amdgpu_crtc = to_amdgpu_crtc(plane->crtc); + adev = drm_to_adev(fb->dev); + + /* Disable DC tiling */ + fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); + fb_format &= ~GRPH_ARRAY_MODE(0x7); + WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); + +} + +static const struct drm_plane_helper_funcs dce_v6_0_drm_primary_plane_helper_funcs = { + .get_scanout_buffer = amdgpu_display_get_scanout_buffer, + .panic_flush = dce_v6_0_panic_flush, +}; + static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; @@ -2629,6 +2655,7 @@ static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->encoder = NULL; amdgpu_crtc->connector = NULL; drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v6_0_crtc_helper_funcs); + drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v6_0_drm_primary_plane_helper_funcs); return 0; } @@ -3124,13 +3151,13 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev, } -static int dce_v6_0_set_clockgating_state(void *handle, +static int dce_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int dce_v6_0_set_powergating_state(void *handle, +static int dce_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 04b79ff87f75..f2edc0fece5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2613,6 +2613,31 @@ static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = { .get_scanout_position = amdgpu_crtc_get_scanout_position, }; +static void dce_v8_0_panic_flush(struct drm_plane *plane) +{ + struct drm_framebuffer *fb; + struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_device *adev; + uint32_t fb_format; + + if (!plane->fb) + return; + + fb = plane->fb; + amdgpu_crtc = to_amdgpu_crtc(plane->crtc); + adev = drm_to_adev(fb->dev); + + /* Disable DC tiling */ + fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); + fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK; + WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); +} + +static const struct drm_plane_helper_funcs dce_v8_0_drm_primary_plane_helper_funcs = { + .get_scanout_buffer = amdgpu_display_get_scanout_buffer, + .panic_flush = dce_v8_0_panic_flush, +}; + static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; @@ -2640,6 +2665,7 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->encoder = NULL; amdgpu_crtc->connector = NULL; drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v8_0_crtc_helper_funcs); + drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v8_0_drm_primary_plane_helper_funcs); return 0; } @@ -3212,13 +3238,13 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev, } -static int dce_v8_0_set_clockgating_state(void *handle, +static int dce_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int dce_v8_0_set_powergating_state(void *handle, +static int dce_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 24dce803a829..003522c2d902 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3673,7 +3673,7 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, unsigned int vmid); -static int gfx_v10_0_set_powergating_state(void *handle, +static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -4036,7 +4036,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: amdgpu_device_wb_free(adev, index); @@ -4138,18 +4138,21 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp%s.bin", ucode_prefix, wks); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me%s.bin", ucode_prefix, wks); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce%s.bin", ucode_prefix, wks); if (err) goto out; @@ -4173,6 +4176,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec%s.bin", ucode_prefix, wks); if (err) goto out; @@ -4180,6 +4184,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2%s.bin", ucode_prefix, wks); if (!err) { amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); @@ -5952,7 +5957,7 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) else WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); - if (adev->job_hang && !enable) + if (amdgpu_in_reset(adev) && !enable) return 0; for (i = 0; i < adev->usec_timeout; i++) { @@ -6599,17 +6604,13 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp); + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp | 0x80); break; default: tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80); break; } } @@ -7457,7 +7458,7 @@ static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block) * otherwise the gfxoff disallowing will be failed to set. */ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1)) - gfx_v10_0_set_powergating_state(ip_block->adev, AMD_PG_STATE_UNGATE); + gfx_v10_0_set_powergating_state(ip_block, AMD_PG_STATE_UNGATE); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { @@ -8345,10 +8346,10 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = { .is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range, }; -static int gfx_v10_0_set_powergating_state(void *handle, +static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -8383,10 +8384,10 @@ static int gfx_v10_0_set_powergating_state(void *handle, return 0; } -static int gfx_v10_0_set_clockgating_state(void *handle, +static int gfx_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 2ae058a224f4..fe5d7cd814f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -615,7 +615,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: if (!ring->is_mes_queue) - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: if (!ring->is_mes_queue) @@ -639,6 +639,7 @@ static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char * int err = 0; err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_toc.bin", ucode_prefix); if (err) goto out; @@ -688,6 +689,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", ucode_prefix); if (err) goto out; @@ -705,6 +707,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", ucode_prefix); if (err) goto out; @@ -720,9 +723,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && adev->pdev->revision == 0xCE) err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/gc_11_0_0_rlc_1.bin"); else err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", ucode_prefix); if (err) goto out; @@ -735,6 +740,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", ucode_prefix); if (err) goto out; @@ -3918,9 +3924,7 @@ static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) @@ -5458,10 +5462,10 @@ static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } -static int gfx_v11_0_set_powergating_state(void *handle, +static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -5494,10 +5498,10 @@ static int gfx_v11_0_set_powergating_state(void *handle, return 0; } -static int gfx_v11_0_set_clockgating_state(void *handle, +static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -6646,30 +6650,14 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; - int i, r = 0; + int r = 0; if (amdgpu_sriov_vf(adev)) return -EINVAL; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); - WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); - - /* make sure dequeue is complete*/ - for (i = 0; i < adev->usec_timeout; i++) { - if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - if (i >= adev->usec_timeout) - r = -ETIMEDOUT; - soc21_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); if (r) { - dev_err(adev->dev, "fail to wait on hqd deactivate\n"); + dev_err(adev->dev, "reset via MMIO failed %d\n", r); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index fe7c48f2fb2a..40c47a2dd060 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -513,7 +513,7 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: if (!ring->is_mes_queue) - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: if (!ring->is_mes_queue) @@ -537,6 +537,7 @@ static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char * int err = 0; err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_toc.bin", ucode_prefix); if (err) goto out; @@ -566,6 +567,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", ucode_prefix); if (err) goto out; @@ -573,6 +575,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", ucode_prefix); if (err) goto out; @@ -581,6 +584,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", ucode_prefix); if (err) goto out; @@ -593,6 +597,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", ucode_prefix); if (err) goto out; @@ -2832,9 +2837,7 @@ static void gfx_v12_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } static void gfx_v12_0_cp_set_doorbell_range(struct amdgpu_device *adev) @@ -3864,10 +3867,10 @@ static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable) } #endif -static int gfx_v12_0_set_powergating_state(void *handle, +static int gfx_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -4115,15 +4118,15 @@ static int gfx_v12_0_update_gfx_clock_gating(struct amdgpu_device *adev, return 0; } -static int gfx_v12_0_set_clockgating_state(void *handle, +static int gfx_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; - switch (adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): gfx_v12_0_update_gfx_clock_gating(adev, @@ -5233,24 +5236,16 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; - int r, i; + int r; if (amdgpu_sriov_vf(adev)) return -EINVAL; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - mutex_lock(&adev->srbm_mutex); - soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); - WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); - for (i = 0; i < adev->usec_timeout; i++) { - if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) - break; - udelay(1); + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); + if (r) { + dev_err(adev->dev, "reset via MMIO failed %d\n", r); + return r; } - soc24_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h index bcc9c72ccbde..f7184b2dc4e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h @@ -26,4 +26,6 @@ extern const struct amdgpu_ip_block_version gfx_v12_0_ip_block; +int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 41f50bf380c4..f26e2cdec07a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -337,6 +337,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); if (err) goto out; @@ -345,6 +346,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); if (err) goto out; @@ -353,6 +355,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); if (err) goto out; @@ -361,6 +364,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; @@ -1906,7 +1910,7 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; error: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); return r; } @@ -3373,11 +3377,11 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } -static int gfx_v6_0_set_clockgating_state(void *handle, +static int gfx_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) gate = true; @@ -3395,11 +3399,11 @@ static int gfx_v6_0_set_clockgating_state(void *handle, return 0; } -static int gfx_v6_0_set_powergating_state(void *handle, +static int gfx_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) gate = true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 824d5913103b..84745b2453ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -934,33 +934,39 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); if (err) goto out; err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); if (err) goto out; err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); if (err) goto out; err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", chip_name); if (err) goto out; if (adev->asic_type == CHIP_KAVERI) { err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2.bin", chip_name); if (err) goto out; } err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); out: if (err) { @@ -2324,7 +2330,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; error: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); return r; } @@ -4846,11 +4852,11 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } -static int gfx_v7_0_set_clockgating_state(void *handle, +static int gfx_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) gate = true; @@ -4869,11 +4875,11 @@ static int gfx_v7_0_set_clockgating_state(void *handle, return 0; } -static int gfx_v7_0_set_powergating_state(void *handle, +static int gfx_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) gate = true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b7006c41e270..af73f85527b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -914,7 +914,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: amdgpu_device_wb_free(adev, index); @@ -982,13 +982,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_pfp_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); } if (err) @@ -999,13 +1002,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_me_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); } if (err) @@ -1017,13 +1023,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_ce_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); } if (err) @@ -1044,6 +1053,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) adev->virt.chained_ib_support = false; err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; @@ -1093,13 +1103,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_mec_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", chip_name); } if (err) @@ -1112,13 +1125,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) (adev->asic_type != CHIP_TOPAZ)) { if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_mec2_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2.bin", chip_name); } if (!err) { @@ -1640,7 +1656,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) RREG32(sec_ded_counter_registers[i]); fail: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); return r; @@ -4304,9 +4320,7 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32(mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32(mmRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32(mmRLC_CP_SCHEDULERS, tmp); + WREG32(mmRLC_CP_SCHEDULERS, tmp | 0x80); } static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) @@ -5321,7 +5335,7 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade (adev->asic_type == CHIP_POLARIS12) || (adev->asic_type == CHIP_VEGAM)) /* Send msg to SMU via Powerplay */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable, 0); WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); } @@ -5367,10 +5381,10 @@ static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, } } -static int gfx_v8_0_set_powergating_state(void *handle, +static int gfx_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -5982,10 +5996,10 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, return 0; } -static int gfx_v8_0_set_clockgating_state(void *handle, +static int gfx_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0b6f09f2cc9b..4b5006dc3d34 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1243,7 +1243,7 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: amdgpu_device_wb_free(adev, index); @@ -1429,18 +1429,21 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, int err; err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); if (err) goto out; @@ -1476,6 +1479,7 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc_am4.bin", chip_name); else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && (smu_version >= 0x41e2b)) @@ -1483,9 +1487,11 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. */ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_kicker_rlc.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; @@ -1518,9 +1524,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, - "amdgpu/%s_sjt_mec.bin", chip_name); + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sjt_mec.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", chip_name); if (err) goto out; @@ -1531,9 +1539,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sjt_mec2.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2.bin", chip_name); if (!err) { amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); @@ -3488,9 +3498,7 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80); } static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) @@ -4780,7 +4788,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) } fail: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); return r; @@ -5232,10 +5240,10 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, }; -static int gfx_v9_0_set_powergating_state(void *handle, +static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { @@ -5277,10 +5285,10 @@ static int gfx_v9_0_set_powergating_state(void *handle, return 0; } -static int gfx_v9_0_set_clockgating_state(void *handle, +static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 3f4fd2f08163..d81449f9d822 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -412,7 +412,7 @@ static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev, r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr); if (r) { dev_err(adev->dev, "ib submit failed (%d).\n", r); - amdgpu_ib_free(adev, ib, NULL); + amdgpu_ib_free(ib, NULL); } return r; } @@ -611,16 +611,16 @@ static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev) } disp2_failed: - amdgpu_ib_free(adev, &disp_ibs[2], NULL); + amdgpu_ib_free(&disp_ibs[2], NULL); dma_fence_put(fences[2]); disp1_failed: - amdgpu_ib_free(adev, &disp_ibs[1], NULL); + amdgpu_ib_free(&disp_ibs[1], NULL); dma_fence_put(fences[1]); disp0_failed: - amdgpu_ib_free(adev, &disp_ibs[0], NULL); + amdgpu_ib_free(&disp_ibs[0], NULL); dma_fence_put(fences[0]); pro_end: - amdgpu_ib_free(adev, &wb_ib, NULL); + amdgpu_ib_free(&wb_ib, NULL); if (r) dev_info(adev->dev, "Init SGPRS Failed\n"); @@ -687,10 +687,10 @@ static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev) } disp_failed: - amdgpu_ib_free(adev, &disp_ib, NULL); + amdgpu_ib_free(&disp_ib, NULL); dma_fence_put(fence); pro_end: - amdgpu_ib_free(adev, &wb_ib, NULL); + amdgpu_ib_free(&wb_ib, NULL); if (r) dev_info(adev->dev, "Init VGPRS Failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index e2b3dda57030..a5bdcaf7a081 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -43,8 +43,12 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_5_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_5_0_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin"); #define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L @@ -52,10 +56,6 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); #define GOLDEN_GB_ADDR_CONFIG 0x2a114042 #define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301 -#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */ -#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ -#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ - #define XCC_REG_RANGE_0_LOW 0x2000 /* XCC gfxdec0 lower Bound */ #define XCC_REG_RANGE_0_HIGH 0x3400 /* XCC gfxdec0 upper Bound */ #define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */ @@ -349,13 +349,17 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG, GOLDEN_GB_ADDR_CONFIG); - /* Golden settings applied by driver for ASIC with rev_id 0 */ - if (adev->rev_id == 0) { - WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1, - REDUCE_FIFO_DEPTH_BY_2, 2); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) { + WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2, SPARE, 0x1); } else { - WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2, - SPARE, 0x1); + /* Golden settings applied by driver for ASIC with rev_id 0 */ + if (adev->rev_id == 0) { + WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1, + REDUCE_FIFO_DEPTH_BY_2, 2); + } else { + WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2, + SPARE, 0x1); + } } } } @@ -499,7 +503,7 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: amdgpu_device_wb_free(adev, index); @@ -543,6 +547,7 @@ static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev, err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; @@ -574,8 +579,14 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev, { int err; - err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, - "amdgpu/%s_mec.bin", chip_name); + if (amdgpu_sriov_vf(adev)) + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sjt_mec.bin", chip_name); + else + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mec.bin", chip_name); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); @@ -929,6 +940,7 @@ static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1779,9 +1791,7 @@ static void gfx_v9_4_3_xcc_kiq_setting(struct amdgpu_ring *ring, int xcc_id) tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp | 0x80); } static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) @@ -2764,16 +2774,16 @@ static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { .is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range, }; -static int gfx_v9_4_3_set_powergating_state(void *handle, +static int gfx_v9_4_3_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; } -static int gfx_v9_4_3_set_clockgating_state(void *handle, +static int gfx_v9_4_3_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, num_xcc; if (amdgpu_sriov_vf(adev)) @@ -4653,7 +4663,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) num_xcc = NUM_XCC(adev->gfx.xcc_mask); - amdgpu_gfx_off_ctrl(adev, false); for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { xcc_offset = xcc_id * reg_count; for (i = 0; i < reg_count; i++) @@ -4661,7 +4670,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) RREG32(SOC15_REG_ENTRY_OFFSET_INST(gc_reg_list_9_4_3[i], GET_INST(GC, xcc_id))); } - amdgpu_gfx_off_ctrl(adev, true); /* dump compute queue registers for all instances */ if (!adev->gfx.ip_dump_compute_queues) @@ -4670,7 +4678,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); - amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->srbm_mutex); for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { xcc_offset = xcc_id * reg_count * num_inst; @@ -4697,7 +4704,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) } soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - amdgpu_gfx_off_ctrl(adev, true); } static void gfx_v9_4_3_ring_emit_cleaner_shader(struct amdgpu_ring *ring) @@ -4860,6 +4866,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): /* 9.4.3 removed all the GDS internal memory, * only support GWS opcode in kernel, like barrier * semaphore.etc */ @@ -4873,6 +4880,7 @@ static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): /* deprecated for 9.4.3, no usage at all */ adev->gds.gds_compute_max_wave_id = 0; break; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index ed8e130c7d19..5470cef7e9bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -368,7 +368,9 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 4)); + IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == + IP_VERSION(9, 5, 0)); WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 697599c46240..9bedca9a79c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -1088,11 +1088,11 @@ static int gmc_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int gmc_v10_0_set_clockgating_state(void *handle, +static int gmc_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* * The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled @@ -1131,7 +1131,7 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags) athub_v2_0_get_clockgating(adev, flags); } -static int gmc_v10_0_set_powergating_state(void *handle, +static int gmc_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index f893ab4c14df..72751ab4c766 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -996,11 +996,11 @@ static int gmc_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int gmc_v11_0_set_clockgating_state(void *handle, +static int gmc_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = adev->mmhub.funcs->set_clockgating(adev, state); if (r) @@ -1018,7 +1018,7 @@ static void gmc_v11_0_get_clockgating_state(void *handle, u64 *flags) athub_v3_0_get_clockgating(adev, flags); } -static int gmc_v11_0_set_powergating_state(void *handle, +static int gmc_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index d22b027fd0bb..b749f1c3f6a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -40,7 +40,7 @@ #include "gfxhub_v12_0.h" #include "mmhub_v4_1_0.h" #include "athub_v4_1_0.h" - +#include "umc_v8_14.h" static int gmc_v12_0_ecc_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, @@ -581,6 +581,18 @@ static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) static void gmc_v12_0_set_umc_funcs(struct amdgpu_device *adev) { + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { + case IP_VERSION(8, 14, 0): + adev->umc.channel_inst_num = UMC_V8_14_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V8_14_UMC_INSTANCE_NUM(adev); + adev->umc.node_inst_num = 0; + adev->umc.max_ras_err_cnt_per_query = UMC_V8_14_TOTAL_CHANNEL_NUM(adev); + adev->umc.channel_offs = UMC_V8_14_PER_CHANNEL_OFFSET; + adev->umc.ras = &umc_v8_14_ras; + break; + default: + break; + } } @@ -829,6 +841,10 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_vm_manager_init(adev); + r = amdgpu_gmc_ras_sw_init(adev); + if (r) + return r; + return 0; } @@ -980,11 +996,11 @@ static int gmc_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int gmc_v12_0_set_clockgating_state(void *handle, +static int gmc_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = adev->mmhub.funcs->set_clockgating(adev, state); if (r) @@ -1002,7 +1018,7 @@ static void gmc_v12_0_get_clockgating_state(void *handle, u64 *flags) athub_v4_1_0_get_clockgating(adev, flags); } -static int gmc_v12_0_set_powergating_state(void *handle, +static int gmc_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index ca000b3d1afc..2245dda92021 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -131,7 +131,8 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev) if (((RREG32(mmMC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58) chip_name = "si58"; - err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mc.bin", chip_name); if (err) { dev_err(adev->dev, "si_mc: Failed to load firmware \"%s_mc.bin\"\n", @@ -1094,13 +1095,13 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int gmc_v6_0_set_clockgating_state(void *handle, +static int gmc_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int gmc_v6_0_set_powergating_state(void *handle, +static int gmc_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index b6016f11956e..9aac4b1101e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -157,7 +157,8 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mc.bin", chip_name); if (err) { pr_err("cik_mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name); amdgpu_ucode_release(&adev->gmc.fw); @@ -1317,11 +1318,11 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int gmc_v7_0_set_clockgating_state(void *handle, +static int gmc_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) gate = true; @@ -1337,7 +1338,7 @@ static int gmc_v7_0_set_clockgating_state(void *handle, return 0; } -static int gmc_v7_0_set_powergating_state(void *handle, +static int gmc_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 12d5967ecd45..d06585207c33 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -259,7 +259,8 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mc.bin", chip_name); if (err) { pr_err("mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name); amdgpu_ucode_release(&adev->gmc.fw); @@ -1658,10 +1659,10 @@ static void fiji_update_mc_light_sleep(struct amdgpu_device *adev, } } -static int gmc_v8_0_set_clockgating_state(void *handle, +static int gmc_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1679,7 +1680,7 @@ static int gmc_v8_0_set_clockgating_state(void *handle, return 0; } -static int gmc_v8_0_set_powergating_state(void *handle, +static int gmc_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 50c5da3020cb..291549765c38 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -623,6 +623,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, } } + if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault)) + return 1; + if (!printk_ratelimit()) return 0; @@ -645,7 +648,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, soc15_ih_clientid_name[entry->client_id]); if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n", node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4, node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : ""); @@ -795,7 +799,8 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, { if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) return false; return ((vmhub == AMDGPU_MMHUB0(0) || @@ -1138,12 +1143,13 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; struct amdgpu_vm *vm = mapping->bo_va->base.vm; unsigned int mtype_local, mtype; + uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); bool snoop = false; bool is_local; dma_resv_assert_held(bo->tbo.base.resv); - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + switch (gc_ip_version) { case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): if (is_vram) { @@ -1157,10 +1163,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, /* FIXME: is this still needed? Or does * amdgpu_ttm_tt_pde_flags already handle this? */ - if ((amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 2) || - amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 3)) && + if (gc_ip_version == IP_VERSION(9, 4, 2) && adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { @@ -1184,6 +1187,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): /* Only local VRAM BOs or system memory on non-NUMA APUs * can be assumed to be local in their entirety. Choose * MTYPE_NC as safe fallback for all system memory BOs on @@ -1208,7 +1212,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, if (uncached) { mtype = MTYPE_UC; } else if (ext_coherent) { - if (adev->rev_id) + if (gc_ip_version == IP_VERSION(9, 5, 0) || adev->rev_id) mtype = is_local ? MTYPE_CC : MTYPE_UC; else mtype = MTYPE_UC; @@ -1218,10 +1222,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, /* dGPU */ if (is_local) mtype = mtype_local; - else if (is_vram) - mtype = MTYPE_NC; - else + else if (gc_ip_version < IP_VERSION(9, 5, 0) && !is_vram) mtype = MTYPE_UC; + else + mtype = MTYPE_NC; } break; @@ -1275,7 +1279,8 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, * memory can use more efficient MTYPEs. */ if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3) && - amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4)) + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4) && + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 5, 0)) return; /* Only direct-mapped memory allows us to determine the NUMA node from @@ -1540,6 +1545,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) adev->mmhub.ras = &mmhub_v1_7_ras; break; case IP_VERSION(1, 8, 0): + case IP_VERSION(1, 8, 1): adev->mmhub.ras = &mmhub_v1_8_ras; break; default: @@ -1551,7 +1557,8 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) { if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) adev->gfxhub.funcs = &gfxhub_v1_2_funcs; else adev->gfxhub.funcs = &gfxhub_v1_0_funcs; @@ -1619,7 +1626,8 @@ static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block) if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) adev->gmc.xgmi.supported = true; if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) { @@ -1792,6 +1800,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): default: adev->gmc.gart_size = 512ULL << 20; break; @@ -2070,7 +2079,8 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) spin_lock_init(&adev->gmc.invalidate_lock); if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) { gmc_v9_4_3_init_vram_info(adev); } else if (!adev->bios) { if (adev->flags & AMD_IS_APU) { @@ -2154,6 +2164,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), NUM_XCC(adev->gfx.xcc_mask)); @@ -2220,7 +2231,8 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_gmc_get_vbios_allocations(adev); if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) { r = gmc_v9_0_init_mem_ranges(adev); if (r) return r; @@ -2250,7 +2262,8 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) ? + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) ? 3 : 8; @@ -2263,7 +2276,8 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) return r; if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) amdgpu_gmc_sysfs_init(adev); return 0; @@ -2274,7 +2288,8 @@ static int gmc_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) amdgpu_gmc_sysfs_fini(adev); amdgpu_gmc_ras_fini(adev); @@ -2544,10 +2559,10 @@ static int gmc_v9_0_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int gmc_v9_0_set_clockgating_state(void *handle, +static int gmc_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->mmhub.funcs->set_clockgating(adev, state); @@ -2565,7 +2580,7 @@ static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags) athub_v1_0_get_clockgating(adev, flags); } -static int gmc_v9_0_set_powergating_state(void *handle, +static int gmc_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 7f45e93c0397..8ac3d3282268 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -392,13 +392,13 @@ static int iceland_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int iceland_ih_set_clockgating_state(void *handle, +static int iceland_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int iceland_ih_set_powergating_state(void *handle, +static int iceland_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 38f953fd65d9..f8a485164437 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -693,10 +693,10 @@ static void ih_v6_0_update_clockgating_state(struct amdgpu_device *adev, } } -static int ih_v6_0_set_clockgating_state(void *handle, +static int ih_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; ih_v6_0_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -756,10 +756,10 @@ static void ih_v6_0_update_ih_mem_power_gating(struct amdgpu_device *adev, WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); } -static int ih_v6_0_set_powergating_state(void *handle, +static int ih_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG) diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index 61381e0c3795..dd0042efceec 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -674,10 +674,10 @@ static void ih_v6_1_update_clockgating_state(struct amdgpu_device *adev, return; } -static int ih_v6_1_set_clockgating_state(void *handle, +static int ih_v6_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; ih_v6_1_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -737,10 +737,10 @@ static void ih_v6_1_update_ih_mem_power_gating(struct amdgpu_device *adev, WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); } -static int ih_v6_1_set_powergating_state(void *handle, +static int ih_v6_1_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG) diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c index d2428cf5d385..8f9b15c171f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c @@ -664,10 +664,10 @@ static void ih_v7_0_update_clockgating_state(struct amdgpu_device *adev, return; } -static int ih_v7_0_set_clockgating_state(void *handle, +static int ih_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; ih_v7_0_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -727,10 +727,10 @@ static void ih_v7_0_update_ih_mem_power_gating(struct amdgpu_device *adev, WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); } -static int ih_v7_0_set_powergating_state(void *handle, +static int ih_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG) diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index d4f72e47ae9e..aeca5c08ea2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -50,7 +50,8 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix); + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c index 1341f0292031..df898dbb746e 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c @@ -47,7 +47,8 @@ static int imu_v12_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix); + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 6e29b69894a5..7c9251c03815 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -35,7 +35,7 @@ static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v2_0_set_powergating_state(void *handle, +static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); /** @@ -154,7 +154,7 @@ static int jpeg_v2_0_hw_fini(struct amdgpu_ip_block *ip_block) if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS)) - jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v2_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); return 0; } @@ -675,14 +675,14 @@ static int jpeg_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int jpeg_v2_0_set_clockgating_state(void *handle, +static int jpeg_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); if (enable) { - if (!jpeg_v2_0_is_idle(handle)) + if (!jpeg_v2_0_is_idle(adev)) return -EBUSY; jpeg_v2_0_enable_clock_gating(adev); } else { @@ -692,10 +692,10 @@ static int jpeg_v2_0_set_clockgating_state(void *handle, return 0; } -static int jpeg_v2_0_set_powergating_state(void *handle, +static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (state == adev->jpeg.cur_state) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 9ac421486f05..11f6af2646e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -38,7 +38,7 @@ static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v2_5_set_powergating_state(void *handle, +static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev); @@ -219,7 +219,7 @@ static int jpeg_v2_5_hw_fini(struct amdgpu_ip_block *ip_block) if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS)) - jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v2_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE); if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) amdgpu_irq_put(adev, &adev->jpeg.inst[i].ras_poison_irq, 0); @@ -518,10 +518,10 @@ static int jpeg_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int jpeg_v2_5_set_clockgating_state(void *handle, +static int jpeg_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); int i; @@ -530,7 +530,7 @@ static int jpeg_v2_5_set_clockgating_state(void *handle, continue; if (enable) { - if (!jpeg_v2_5_is_idle(handle)) + if (!jpeg_v2_5_is_idle(adev)) return -EBUSY; jpeg_v2_5_enable_clock_gating(adev, i); } else { @@ -541,10 +541,10 @@ static int jpeg_v2_5_set_clockgating_state(void *handle, return 0; } -static int jpeg_v2_5_set_powergating_state(void *handle, +static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (state == adev->jpeg.cur_state) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index e0df6800502c..4eca65ea9053 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -36,7 +36,7 @@ static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v3_0_set_powergating_state(void *handle, +static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); /** @@ -168,7 +168,7 @@ static int jpeg_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS)) - jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v3_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); return 0; } @@ -466,14 +466,14 @@ static int jpeg_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block) UVD_JRBC_STATUS__RB_JOB_DONE_MASK); } -static int jpeg_v3_0_set_clockgating_state(void *handle, +static int jpeg_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; if (enable) { - if (!jpeg_v3_0_is_idle(handle)) + if (!jpeg_v3_0_is_idle(adev)) return -EBUSY; jpeg_v3_0_enable_clock_gating(adev); } else { @@ -483,10 +483,10 @@ static int jpeg_v3_0_set_clockgating_state(void *handle, return 0; } -static int jpeg_v3_0_set_powergating_state(void *handle, +static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if(state == adev->jpeg.cur_state) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index eca1963c33b6..0aef1f64afd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -39,7 +39,7 @@ static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev); static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v4_0_set_powergating_state(void *handle, +static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev); @@ -206,7 +206,7 @@ static int jpeg_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) { if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) - jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v4_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0); @@ -635,14 +635,14 @@ static int jpeg_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block) UVD_JRBC_STATUS__RB_JOB_DONE_MASK); } -static int jpeg_v4_0_set_clockgating_state(void *handle, +static int jpeg_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; if (enable) { - if (!jpeg_v4_0_is_idle(handle)) + if (!jpeg_v4_0_is_idle(adev)) return -EBUSY; jpeg_v4_0_enable_clock_gating(adev); } else { @@ -652,10 +652,10 @@ static int jpeg_v4_0_set_clockgating_state(void *handle, return 0; } -static int jpeg_v4_0_set_powergating_state(void *handle, +static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (amdgpu_sriov_vf(adev)) { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 67b51bcbacd1..88f9771c1686 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -43,7 +43,7 @@ enum jpeg_engin_status { static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v4_0_3_set_powergating_state(void *handle, +static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring); @@ -76,7 +76,7 @@ static int jpeg_v4_0_3_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; + adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3; jpeg_v4_0_3_set_dec_ring_funcs(adev); jpeg_v4_0_3_set_irq_funcs(adev); @@ -321,7 +321,7 @@ static int jpeg_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; ring->wptr = 0; @@ -379,7 +379,7 @@ static int jpeg_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) { if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) - ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); + ret = jpeg_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } return ret; @@ -949,16 +949,16 @@ static int jpeg_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int jpeg_v4_0_3_set_clockgating_state(void *handle, +static int jpeg_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (enable) { - if (!jpeg_v4_0_3_is_idle(handle)) + if (!jpeg_v4_0_3_is_idle(adev)) return -EBUSY; jpeg_v4_0_3_enable_clock_gating(adev, i); } else { @@ -968,10 +968,10 @@ static int jpeg_v4_0_3_set_clockgating_state(void *handle, return 0; } -static int jpeg_v4_0_3_set_powergating_state(void *handle, +static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (amdgpu_sriov_vf(adev)) { @@ -1231,9 +1231,95 @@ static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = { .reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count, }; +static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_bank_info info; + u64 misc0; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +/* reference to smu driver if header file */ +static int jpeg_v4_0_3_err_codes[] = { + 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-7][S|D] */ + 24, 25, 26, 27, 28, 29, 30, 31 +}; + +static bool jpeg_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + jpeg_v4_0_3_err_codes, + ARRAY_SIZE(jpeg_v4_0_3_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops jpeg_v4_0_3_aca_bank_ops = { + .aca_bank_parser = jpeg_v4_0_3_aca_bank_parser, + .aca_bank_is_valid = jpeg_v4_0_3_aca_bank_is_valid, +}; + +static const struct aca_info jpeg_v4_0_3_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &jpeg_v4_0_3_aca_bank_ops, +}; + +static int jpeg_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG, + &jpeg_v4_0_3_aca_info, NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = { .ras_block = { .hw_ops = &jpeg_v4_0_3_ras_hw_ops, + .ras_late_init = jpeg_v4_0_3_ras_late_init, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 1d9e3b101c3a..6b3656984957 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -48,7 +48,7 @@ static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v4_0_5_set_powergating_state(void *handle, +static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring); @@ -236,7 +236,7 @@ static int jpeg_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) { if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS)) - jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v4_0_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } } return 0; @@ -660,10 +660,10 @@ static int jpeg_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int jpeg_v4_0_5_set_clockgating_state(void *handle, +static int jpeg_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; int i; @@ -672,7 +672,7 @@ static int jpeg_v4_0_5_set_clockgating_state(void *handle, continue; if (enable) { - if (!jpeg_v4_0_5_is_idle(handle)) + if (!jpeg_v4_0_5_is_idle(adev)) return -EBUSY; jpeg_v4_0_5_enable_clock_gating(adev, i); @@ -684,10 +684,10 @@ static int jpeg_v4_0_5_set_clockgating_state(void *handle, return 0; } -static int jpeg_v4_0_5_set_powergating_state(void *handle, +static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (amdgpu_sriov_vf(adev)) { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index 58fb1e5fa89c..d5cf0f2799d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -31,12 +31,12 @@ #include "vcn/vcn_5_0_0_offset.h" #include "vcn/vcn_5_0_0_sh_mask.h" -#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" #include "jpeg_v5_0_0.h" static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v5_0_0_set_powergating_state(void *handle, +static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); /** @@ -74,7 +74,7 @@ static int jpeg_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) /* JPEG TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); + VCN_5_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); if (r) return r; @@ -172,7 +172,7 @@ static int jpeg_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block) if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) - jpeg_v5_0_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v5_0_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); return 0; } @@ -560,14 +560,14 @@ static int jpeg_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block) UVD_JRBC_STATUS__RB_JOB_DONE_MASK); } -static int jpeg_v5_0_0_set_clockgating_state(void *handle, +static int jpeg_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; if (enable) { - if (!jpeg_v5_0_0_is_idle(handle)) + if (!jpeg_v5_0_0_is_idle(adev)) return -EBUSY; jpeg_v5_0_0_enable_clock_gating(adev); } else { @@ -577,10 +577,10 @@ static int jpeg_v5_0_0_set_clockgating_state(void *handle, return 0; } -static int jpeg_v5_0_0_set_powergating_state(void *handle, +static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (state == adev->jpeg.cur_state) @@ -612,7 +612,7 @@ static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev, DRM_DEBUG("IH: JPEG TRAP\n"); switch (entry->src_id) { - case VCN_4_0__SRCID__JPEG_DECODE: + case VCN_5_0__SRCID__JPEG_DECODE: amdgpu_fence_process(adev->jpeg.inst->ring_dec); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c new file mode 100644 index 000000000000..40d4c32a8c2a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -0,0 +1,708 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2014-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "jpeg_v4_0_3.h" +#include "jpeg_v5_0_1.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" + +static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state); +static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring); + +static int amdgpu_ih_srcid_jpeg[] = { + VCN_5_0__SRCID__JPEG_DECODE, + VCN_5_0__SRCID__JPEG1_DECODE, + VCN_5_0__SRCID__JPEG2_DECODE, + VCN_5_0__SRCID__JPEG3_DECODE, + VCN_5_0__SRCID__JPEG4_DECODE, + VCN_5_0__SRCID__JPEG5_DECODE, + VCN_5_0__SRCID__JPEG6_DECODE, + VCN_5_0__SRCID__JPEG7_DECODE, + VCN_5_0__SRCID__JPEG8_DECODE, + VCN_5_0__SRCID__JPEG9_DECODE, +}; + +static int jpeg_v5_0_1_core_reg_offset(u32 pipe) +{ + if (pipe <= AMDGPU_MAX_JPEG_RINGS_4_0_3) + return ((0x40 * pipe) - 0xc80); + else + return ((0x40 * pipe) - 0x440); +} + +/** + * jpeg_v5_0_1_early_init - set function pointers + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Set ring and irq function pointers + */ +static int jpeg_v5_0_1_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + if (!adev->jpeg.num_jpeg_inst || adev->jpeg.num_jpeg_inst > AMDGPU_MAX_JPEG_INSTANCES) + return -ENOENT; + + adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; + jpeg_v5_0_1_set_dec_ring_funcs(adev); + jpeg_v5_0_1_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v5_0_1_sw_init - sw init for JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Load firmware and sw initialization + */ +static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, j, r, jpeg_inst; + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq); + if (r) + return r; + } + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + ring->use_doorbell = false; + ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); + if (!amdgpu_sriov_vf(adev)) { + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 1 + j + 11 * jpeg_inst; + } else { + if (j < 4) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 4 + j + 32 * jpeg_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 8 + j + 32 * jpeg_inst; + } + sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[j] = + regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; + adev->jpeg.inst[i].external.jpeg_pitch[j] = + SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC_SCRATCH0, + (j ? jpeg_v5_0_1_core_reg_offset(j) : 0)); + } + } + + return 0; +} + +/** + * jpeg_v5_0_1_sw_fini - sw fini for JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v5_0_1_hw_init - start and test JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + */ +static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, j, r, jpeg_inst; + + if (amdgpu_sriov_vf(adev)) { + /* jpeg_v5_0_1_start_sriov(adev); */ + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + ring->wptr = 0; + ring->wptr_old = 0; + jpeg_v5_0_1_dec_ring_set_wptr(ring); + ring->sched.ready = true; + } + } + return 0; + } + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + ring = adev->jpeg.inst[i].ring_dec; + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 11 * jpeg_inst, + adev->jpeg.inst[i].aid_id); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + if (ring->use_doorbell) + WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL, + (ring->pipe ? (ring->pipe - 0x15) : 0), + ring->doorbell_index << + VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + } + + return 0; +} + +/** + * jpeg_v5_0_1_hw_fini - stop the hardware block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret = 0; + + cancel_delayed_work_sync(&adev->jpeg.idle_work); + + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) + ret = jpeg_v5_0_1_set_powergating_state(ip_block, AMD_PG_STATE_GATE); + + return ret; +} + +/** + * jpeg_v5_0_1_suspend - suspend JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * HW fini and suspend JPEG block + */ +static int jpeg_v5_0_1_suspend(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = jpeg_v5_0_1_hw_fini(ip_block); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v5_0_1_resume - resume JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v5_0_1_resume(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v5_0_1_hw_init(ip_block); + + return r; +} + +static int jpeg_v5_0_1_disable_antihang(struct amdgpu_device *adev, int inst_idx) +{ + int jpeg_inst; + + jpeg_inst = GET_INST(JPEG, inst_idx); + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* keep the JPEG in static PG mode */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK); + + return 0; +} + +static int jpeg_v5_0_1_enable_antihang(struct amdgpu_device *adev, int inst_idx) +{ + int jpeg_inst; + + jpeg_inst = GET_INST(JPEG, inst_idx); + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + return 0; +} + +/** + * jpeg_v5_0_1_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v5_0_1_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int i, j, jpeg_inst, r; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + + /* disable antihang */ + r = jpeg_v5_0_1_disable_antihang(adev, i); + if (r) + return r; + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0); + u32 reg, data, mask; + + ring = &adev->jpeg.inst[i].ring_dec[j]; + + /* enable System Interrupt for JRBC */ + reg = SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN); + if (j < AMDGPU_MAX_JPEG_RINGS_4_0_3) { + data = JPEG_SYS_INT_EN__DJRBC0_MASK << j; + mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j); + WREG32_P(reg, data, mask); + } else { + data = JPEG_SYS_INT_EN__DJRBC0_MASK << (j+12); + mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << (j+12)); + WREG32_P(reg, data, mask); + } + + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_LMI_JRBC_RB_VMID, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_CNTL, + reg_offset, + (0x00000001L | 0x00000002L)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + reg_offset, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + reg_offset, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_RPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_WPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_CNTL, + reg_offset, 0x00000002L); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_SIZE, + reg_offset, ring->ring_size / 4); + ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC_RB_WPTR, + reg_offset); + } + } + + return 0; +} + +/** + * jpeg_v5_0_1_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v5_0_1_stop(struct amdgpu_device *adev) +{ + int i, jpeg_inst, r; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable antihang */ + r = jpeg_v5_0_1_enable_antihang(adev, i); + if (r) + return r; + } + + return 0; +} + +/** + * jpeg_v5_0_1_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v5_0_1_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_RPTR, + ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0); +} + +/** + * jpeg_v5_0_1_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v5_0_1_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + + return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_WPTR, + ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0); +} + +/** + * jpeg_v5_0_1_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), + regUVD_JRBC_RB_WPTR, + (ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0), + lower_32_bits(ring->wptr)); + } +} + +static bool jpeg_v5_0_1_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool ret = false; + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0); + + ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i), + regUVD_JRBC_STATUS, reg_offset) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } + } + + return ret; +} + +static int jpeg_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret = 0; + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0); + + ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i), + regUVD_JRBC_STATUS, reg_offset, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } + } + return ret; +} + +static int jpeg_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + int i; + + if (!enable) + return 0; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (!jpeg_v5_0_1_is_idle(adev)) + return -EBUSY; + } + + return 0; +} + +static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret; + + if (state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v5_0_1_stop(adev); + else + ret = jpeg_v5_0_1_start(adev); + + if (!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v5_0_1_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v5_0_1_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + u32 i, inst; + + i = node_id_to_phys_map[entry->node_id]; + DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n"); + + for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst) + if (adev->jpeg.inst[inst].aid_id == i) + break; + + if (inst >= adev->jpeg.num_jpeg_inst) { + dev_WARN_ONCE(adev->dev, 1, + "Interrupt received for unknown JPEG instance %d", + entry->node_id); + return 0; + } + + switch (entry->src_id) { + case VCN_5_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]); + break; + case VCN_5_0__SRCID__JPEG1_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]); + break; + case VCN_5_0__SRCID__JPEG2_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]); + break; + case VCN_5_0__SRCID__JPEG3_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]); + break; + case VCN_5_0__SRCID__JPEG4_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]); + break; + case VCN_5_0__SRCID__JPEG5_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]); + break; + case VCN_5_0__SRCID__JPEG6_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]); + break; + case VCN_5_0__SRCID__JPEG7_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]); + break; + case VCN_5_0__SRCID__JPEG8_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[8]); + break; + case VCN_5_0__SRCID__JPEG9_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[9]); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = { + .name = "jpeg_v5_0_1", + .early_init = jpeg_v5_0_1_early_init, + .late_init = NULL, + .sw_init = jpeg_v5_0_1_sw_init, + .sw_fini = jpeg_v5_0_1_sw_fini, + .hw_init = jpeg_v5_0_1_hw_init, + .hw_fini = jpeg_v5_0_1_hw_fini, + .suspend = jpeg_v5_0_1_suspend, + .resume = jpeg_v5_0_1_resume, + .is_idle = jpeg_v5_0_1_is_idle, + .wait_for_idle = jpeg_v5_0_1_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v5_0_1_set_clockgating_state, + .set_powergating_state = jpeg_v5_0_1_set_powergating_state, + .dump_ip_state = NULL, + .print_ip_state = NULL, +}; + +static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr, + .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr, + .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v5_0_1_dec_ring_emit_vm_flush */ + 22 + 22 + /* jpeg_v5_0_1_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v5_0_1_dec_ring_emit_ib */ + .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, + .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v4_0_3_dec_ring_nop, + .insert_start = jpeg_v4_0_3_dec_ring_insert_start, + .insert_end = jpeg_v4_0_3_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + int i, j, jpeg_inst; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v5_0_1_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec[j].me = i; + adev->jpeg.inst[i].ring_dec[j].pipe = j; + } + jpeg_inst = GET_INST(JPEG, i); + adev->jpeg.inst[i].aid_id = + jpeg_inst / adev->jpeg.num_inst_per_aid; + } +} + +static const struct amdgpu_irq_src_funcs jpeg_v5_0_1_irq_funcs = { + .set = jpeg_v5_0_1_set_interrupt_state, + .process = jpeg_v5_0_1_process_interrupt, +}; + +static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) + adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings; + + adev->jpeg.inst->irq.funcs = &jpeg_v5_0_1_irq_funcs; +} + +const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 5, + .minor = 0, + .rev = 1, + .funcs = &jpeg_v5_0_1_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h new file mode 100644 index 000000000000..8ce146c00bb6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h @@ -0,0 +1,29 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V5_0_1_H__ +#define __JPEG_V5_0_1_H__ + +extern const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block; + +#endif /* __JPEG_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 9c905b9e9376..65f389eb65e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1505,9 +1505,7 @@ static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } static void mes_v11_0_kiq_clear(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 9ecc5d61e49b..5b537806b4da 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -24,6 +24,7 @@ #include <linux/firmware.h> #include <linux/module.h> #include "amdgpu.h" +#include "gfx_v12_0.h" #include "soc15_common.h" #include "soc21.h" #include "gc/gc_12_0_0_offset.h" @@ -350,6 +351,132 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req) +{ + u32 i, tmp, val; + + for (i = 0; i < adev->usec_timeout; i++) { + /* Request with MeId=2, PipeId=0 */ + tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); + tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); + WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); + + val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); + if (req) { + if (val == tmp) + break; + } else { + tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, + REQUEST, 1); + + /* unlocked or locked by firmware */ + if (val != tmp) + break; + } + udelay(1); + } + + if (i >= adev->usec_timeout) + return -EINVAL; + + return 0; +} + +static int mes_v12_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, + uint32_t queue_id, uint32_t vmid) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t value, reg; + int i, r = 0; + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + + if (queue_type == AMDGPU_RING_TYPE_GFX) { + dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n", + me_id, pipe_id, queue_id, vmid); + + mutex_lock(&adev->gfx.reset_sem_mutex); + gfx_v12_0_request_gfx_index_mutex(adev, true); + /* all se allow writes */ + WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, + (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT)); + value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + if (pipe_id == 0) + value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id); + else + value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id); + WREG32_SOC15(GC, 0, regCP_VMID_RESET, value); + gfx_v12_0_request_gfx_index_mutex(adev, false); + mutex_unlock(&adev->gfx.reset_sem_mutex); + + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n"); + r = -ETIMEDOUT; + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n", + me_id, pipe_id, queue_id); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0); + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); + + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on hqd deactivate\n"); + r = -ETIMEDOUT; + } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else if (queue_type == AMDGPU_RING_TYPE_SDMA) { + dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n", + me_id, pipe_id, queue_id); + switch (me_id) { + case 1: + reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ); + break; + case 0: + default: + reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ); + break; + } + + value = 1 << queue_id; + WREG32(reg, value); + /* wait for queue reset done */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(reg) & value)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on sdma queue reset done\n"); + r = -ETIMEDOUT; + } + } + + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; +} + static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, struct mes_reset_queue_input *input) { @@ -721,6 +848,11 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, union MESAPI__RESET mes_reset_queue_pkt; int pipe; + if (input->use_mmio) + return mes_v12_0_reset_queue_mmio(mes, input->queue_type, + input->me_id, input->pipe_id, + input->queue_id, input->vmid); + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; @@ -1455,9 +1587,7 @@ static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index e9a6f33ca710..243eabda0607 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -356,7 +356,7 @@ static void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, if (adev->pg_flags & AMD_PG_SUPPORT_MMHUB) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, - enable); + enable, 0); } static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index b01bb759d0f4..e646e5cef0a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -33,7 +33,6 @@ #define regVM_L2_CNTL3_DEFAULT 0x80100007 #define regVM_L2_CNTL4_DEFAULT 0x000000c1 -#define mmSMNAID_AID0_MCA_SMU 0x03b30400 static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index 0fbc3be81f14..f2ab5001b492 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -108,7 +108,7 @@ mmhub_v4_1_0_print_l2_protection_fault_status(struct amdgpu_device *adev, dev_err(adev->dev, "MMVM_L2_PROTECTION_FAULT_STATUS_LO32:0x%08X\n", status); - switch (adev->ip_versions[MMHUB_HWIP][0]) { + switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(4, 1, 0): mmhub_cid = mmhub_client_ids_v4_1_0[cid][rw]; break; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 0820ed62e2e8..62cdfe10e6f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -434,9 +434,8 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, * this should allow us to catch up. */ tmp = (wptr + 32) & ih->ptr_mask; - dev_warn(adev->dev, "IH ring buffer overflow " - "(0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, tmp); + dev_warn(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp); ih->rptr = tmp; tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); @@ -667,17 +666,17 @@ static void navi10_ih_update_clockgating_state(struct amdgpu_device *adev, } } -static int navi10_ih_set_clockgating_state(void *handle, +static int navi10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; navi10_ih_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); return 0; } -static int navi10_ih_set_powergating_state(void *handle, +static int navi10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c index 39919e0892c1..c92875ceb31f 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c @@ -28,6 +28,7 @@ #include "nbif/nbif_6_3_1_sh_mask.h" #include "pcie/pcie_6_1_0_offset.h" #include "pcie/pcie_6_1_0_sh_mask.h" +#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include <uapi/linux/kfd_ioctl.h> static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev) @@ -518,3 +519,83 @@ const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs = { .get_rom_offset = nbif_v6_3_1_get_rom_offset, .set_reg_remap = nbif_v6_3_1_set_reg_remap, }; + +static int nbif_v6_3_1_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) +{ + /* The ras_controller_irq enablement should be done in psp bl when it + * tries to enable ras feature. Driver only need to set the correct interrupt + * vector for bare-metal and sriov use case respectively + */ + uint32_t bif_doorbell_int_cntl; + + bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL); + bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_DISABLE, + (state == AMDGPU_IRQ_STATE_ENABLE) ? 0 : 1); + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl); + + return 0; +} + +static int nbif_v6_3_1_process_err_event_athub_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* By design, the ih cookie for err_event_athub_irq should be written + * to bif ring. since bif ring is not enabled, just leave process callback + * as a dummy one. + */ + return 0; +} + +static const struct amdgpu_irq_src_funcs nbif_v6_3_1_ras_err_event_athub_irq_funcs = { + .set = nbif_v6_3_1_set_ras_err_event_athub_irq_state, + .process = nbif_v6_3_1_process_err_event_athub_irq, +}; + +static void nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev) +{ + uint32_t bif_doorbell_int_cntl; + + bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_int_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1); + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl); + amdgpu_ras_global_ras_isr(adev); + } +} + +static int nbif_v6_3_1_init_ras_err_event_athub_interrupt(struct amdgpu_device *adev) +{ + int r; + + /* init the irq funcs */ + adev->nbio.ras_err_event_athub_irq.funcs = + &nbif_v6_3_1_ras_err_event_athub_irq_funcs; + adev->nbio.ras_err_event_athub_irq.num_types = 1; + + /* register ras err event athub interrupt + * nbif v6_3_1 uses the same irq source as nbio v7_4 + */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_BIF, + NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT, + &adev->nbio.ras_err_event_athub_irq); + + return r; +} + +struct amdgpu_nbio_ras nbif_v6_3_1_ras = { + .handle_ras_err_event_athub_intr_no_bifring = + nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring, + .init_ras_err_event_athub_interrupt = + nbif_v6_3_1_init_ras_err_event_athub_interrupt, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h index b7f2e0d88905..9ac4831d39e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h @@ -29,5 +29,6 @@ extern const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs; extern const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs; +extern struct amdgpu_nbio_ras nbif_v6_3_1_ras; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index b1b57dcc5a73..d1032e9992b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -271,8 +271,19 @@ const struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg = { .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; +#define regRCC_DEV0_EPF6_STRAP4 0xd304 +#define regRCC_DEV0_EPF6_STRAP4_BASE_IDX 5 + static void nbio_v7_0_init_registers(struct amdgpu_device *adev) { + uint32_t data; + + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { + case IP_VERSION(2, 5, 0): + data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4) & ~BIT(23); + WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF6_STRAP4, data); + break; + } } #define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 814ab59fdd4a..41421da63a08 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -275,7 +275,7 @@ static void nbio_v7_11_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_SOC15(NBIO, 0, regBIF_BIF256_CI256_RC3X4_USB4_PCIE_MST_CTRL_3, data); - switch (adev->ip_versions[NBIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(7, 11, 0): case IP_VERSION(7, 11, 1): case IP_VERSION(7, 11, 2): diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index 1ac730328516..3fb6d2aa7e3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -247,7 +247,7 @@ static void nbio_v7_7_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_SOC15(NBIO, 0, regBIF0_PCIE_MST_CTRL_3, data); - switch (adev->ip_versions[NBIO_HWIP][0]) { + switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(7, 7, 0): data = RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4) & ~BIT(23); WREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF5_STRAP4, data); diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 3bad565ded73..47db483c3516 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -1039,10 +1039,10 @@ static bool nv_common_is_idle(void *handle) return true; } -static int nv_common_set_clockgating_state(void *handle, +static int nv_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1070,7 +1070,7 @@ static int nv_common_set_clockgating_state(void *handle, return 0; } -static int nv_common_set_powergating_state(void *handle, +static int nv_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* TODO */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index c4b775aaee9f..cc621064610f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -51,6 +51,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_12_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_12_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_14_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_14_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin"); @@ -122,6 +124,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): err = psp_init_sos_microcode(psp, ucode_prefix); if (err) @@ -177,6 +180,7 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) retry_cnt = ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))) ? PSP_VMBX_POLLING_LIMIT : 10; @@ -203,6 +207,7 @@ static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp) int ret; if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) { ret = psp_v13_0_wait_for_vmbx_ready(psp); if (ret) @@ -288,6 +293,11 @@ static int psp_v13_0_bootloader_load_ras_drv(struct psp_context *psp) return psp_v13_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV); } +static int psp_v13_0_bootloader_load_spdm_drv(struct psp_context *psp) +{ + return psp_v13_0_bootloader_load_component(psp, &psp->spdm_drv, PSP_BL__LOAD_SPDMDRV); +} + static inline void psp_v13_0_init_sos_version(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -798,6 +808,7 @@ static bool psp_v13_0_get_ras_capability(struct psp_context *psp) return false; if ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) && (!(adev->flags & AMD_IS_APU))) { reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_127); @@ -857,6 +868,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv, .bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv, .bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv, + .bootloader_load_spdm_drv = psp_v13_0_bootloader_load_spdm_drv, .bootloader_load_sos = psp_v13_0_bootloader_load_sos, .ring_create = psp_v13_0_ring_create, .ring_stop = psp_v13_0_ring_stop, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 7948d74f8722..135c5099bfb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -145,9 +145,11 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (i == 0) err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma1.bin", chip_name); if (err) goto out; @@ -631,7 +633,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1080,14 +1082,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } -static int sdma_v2_4_set_clockgating_state(void *handle, +static int sdma_v2_4_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { /* XXX handled via the smc on VI */ return 0; } -static int sdma_v2_4_set_powergating_state(void *handle, +static int sdma_v2_4_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 9a3d729545a7..c611328671ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -305,9 +305,11 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (i == 0) err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma1.bin", chip_name); if (err) goto out; @@ -904,7 +906,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1483,10 +1485,10 @@ static void sdma_v3_0_update_sdma_medium_grain_light_sleep( } } -static int sdma_v3_0_set_clockgating_state(void *handle, +static int sdma_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1506,7 +1508,7 @@ static int sdma_v3_0_set_clockgating_state(void *handle, return 0; } -static int sdma_v3_0_set_powergating_state(void *handle, +static int sdma_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index c1f98f6cf20d..b48d9c0b2e1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1565,7 +1565,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1956,7 +1956,7 @@ static int sdma_v4_0_hw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; if (adev->flags & AMD_IS_APU) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false, 0); if (!amdgpu_sriov_vf(adev)) sdma_v4_0_init_golden_registers(adev); @@ -1983,7 +1983,7 @@ static int sdma_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) sdma_v4_0_enable(adev, false); if (adev->flags & AMD_IS_APU) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true, 0); return 0; } @@ -2297,10 +2297,10 @@ static void sdma_v4_0_update_medium_grain_light_sleep( } } -static int sdma_v4_0_set_clockgating_state(void *handle, +static int sdma_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -2312,10 +2312,10 @@ static int sdma_v4_0_set_clockgating_state(void *handle, return 0; } -static int sdma_v4_0_set_powergating_state(void *handle, +static int sdma_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(4, 1, 0): diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index a38553f38fdc..56507ae919b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -189,6 +189,7 @@ static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) || amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) { ret = amdgpu_sdma_init_microcode(adev, 0, true); break; @@ -667,11 +668,12 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) * * @adev: amdgpu_device pointer * @i: instance to resume + * @restore: used to restore wptr when restart * * Set up the gfx DMA ring buffers and enable them. * Returns 0 for success, error for failure. */ -static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) +static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore) { struct amdgpu_ring *ring = &adev->sdma.instance[i].ring; u32 rb_cntl, ib_cntl, wptr_poll_cntl; @@ -698,16 +700,24 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) WREG32_SDMA(i, regSDMA_GFX_RB_BASE, ring->gpu_addr >> 8); WREG32_SDMA(i, regSDMA_GFX_RB_BASE_HI, ring->gpu_addr >> 40); - ring->wptr = 0; + if (!restore) + ring->wptr = 0; /* before programing wptr to a less value, need set minor_ptr_update first */ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1); /* Initialize the ring buffer's read and write pointers */ - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); + if (restore) { + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(ring->wptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(ring->wptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(ring->wptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(ring->wptr << 2)); + } else { + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); + } doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET); @@ -755,11 +765,12 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) * * @adev: amdgpu_device pointer * @i: instance to resume + * @restore: boolean to say restore needed or not * * Set up the page DMA ring buffers and enable them. * Returns 0 for success, error for failure. */ -static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) +static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore) { struct amdgpu_ring *ring = &adev->sdma.instance[i].page; u32 rb_cntl, ib_cntl, wptr_poll_cntl; @@ -775,10 +786,17 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl); /* Initialize the ring buffer's read and write pointers */ - WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0); - WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0); - WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0); - WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0); + if (restore) { + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(ring->wptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(ring->wptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(ring->wptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(ring->wptr << 2)); + } else { + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0); + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0); + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0); + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0); + } /* set the wb address whether it's enabled or not */ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_HI, @@ -792,7 +810,8 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) WREG32_SDMA(i, regSDMA_PAGE_RB_BASE, ring->gpu_addr >> 8); WREG32_SDMA(i, regSDMA_PAGE_RB_BASE_HI, ring->gpu_addr >> 40); - ring->wptr = 0; + if (!restore) + ring->wptr = 0; /* before programing wptr to a less value, need set minor_ptr_update first */ WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 1); @@ -911,12 +930,13 @@ static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @inst_mask: mask of dma engine instances to be enabled + * @restore: boolean to say restore needed or not * * Set up the DMA engines and enable them. * Returns 0 for success, error for failure. */ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, - uint32_t inst_mask) + uint32_t inst_mask, bool restore) { struct amdgpu_ring *ring; uint32_t tmp_mask; @@ -927,7 +947,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, sdma_v4_4_2_inst_enable(adev, false, inst_mask); } else { /* bypass sdma microcode loading on Gopher */ - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP && + if (!restore && adev->firmware.load_type != AMDGPU_FW_LOAD_PSP && adev->sdma.instance[0].fw) { r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask); if (r) @@ -946,17 +966,19 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, uint32_t temp; WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); - sdma_v4_4_2_gfx_resume(adev, i); + sdma_v4_4_2_gfx_resume(adev, i, restore); if (adev->sdma.has_page_queue) - sdma_v4_4_2_page_resume(adev, i); + sdma_v4_4_2_page_resume(adev, i, restore); /* set utc l1 enable flag always to 1 */ temp = RREG32_SDMA(i, regSDMA_CNTL); temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1); - /* enable context empty interrupt during initialization */ - temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1); - WREG32_SDMA(i, regSDMA_CNTL, temp); + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) { + /* enable context empty interrupt during initialization */ + temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1); + WREG32_SDMA(i, regSDMA_CNTL, temp); + } if (!amdgpu_sriov_vf(adev)) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { /* unhalt engine */ @@ -1110,7 +1132,7 @@ static int sdma_v4_4_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1466,6 +1488,7 @@ static int sdma_v4_4_2_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_sdma_sysfs_reset_mask_fini(adev); if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) || amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) amdgpu_sdma_destroy_inst_ctx(adev, true); else @@ -1486,7 +1509,7 @@ static int sdma_v4_4_2_hw_init(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask); - r = sdma_v4_4_2_inst_start(adev, inst_mask); + r = sdma_v4_4_2_inst_start(adev, inst_mask, false); return r; } @@ -1514,7 +1537,7 @@ static int sdma_v4_4_2_hw_fini(struct amdgpu_ip_block *ip_block) return 0; } -static int sdma_v4_4_2_set_clockgating_state(void *handle, +static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block) @@ -1522,7 +1545,7 @@ static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; if (amdgpu_in_reset(adev)) - sdma_v4_4_2_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); + sdma_v4_4_2_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE); return sdma_v4_4_2_hw_fini(ip_block); } @@ -1573,6 +1596,42 @@ static int sdma_v4_4_2_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } +static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int i, r; + u32 inst_mask; + + if ((adev->flags & AMD_IS_APU) || amdgpu_sriov_vf(adev)) + return -EINVAL; + + /* stop queue */ + inst_mask = 1 << ring->me; + sdma_v4_4_2_inst_gfx_stop(adev, inst_mask); + if (adev->sdma.has_page_queue) + sdma_v4_4_2_inst_page_stop(adev, inst_mask); + + r = amdgpu_dpm_reset_sdma(adev, 1 << GET_INST(SDMA0, ring->me)); + if (r) + return r; + + udelay(50); + + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32_SDMA(ring->me, regSDMA_F32_CNTL), SDMA_F32_CNTL, HALT)) + break; + udelay(1); + } + + if (i == adev->usec_timeout) { + dev_err(adev->dev, "timed out waiting for SDMA%d unhalt after reset\n", + ring->me); + return -ETIMEDOUT; + } + + return sdma_v4_4_2_inst_start(adev, inst_mask, true); +} + static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -1821,10 +1880,10 @@ static void sdma_v4_4_2_inst_update_medium_grain_clock_gating( } } -static int sdma_v4_4_2_set_clockgating_state(void *handle, +static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t inst_mask; if (amdgpu_sriov_vf(adev)) @@ -1839,7 +1898,7 @@ static int sdma_v4_4_2_set_clockgating_state(void *handle, return 0; } -static int sdma_v4_4_2_set_powergating_state(void *handle, +static int sdma_v4_4_2_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -1895,7 +1954,6 @@ static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block) if (!adev->sdma.ip_dump) return; - amdgpu_gfx_off_ctrl(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { instance_offset = i * reg_count; for (j = 0; j < reg_count; j++) @@ -1903,7 +1961,6 @@ static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block) RREG32(sdma_v4_4_2_get_reg_offset(adev, i, sdma_reg_list_4_4_2[j].reg_offset)); } - amdgpu_gfx_off_ctrl(adev, true); } const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { @@ -1955,6 +2012,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { .emit_wreg = sdma_v4_4_2_ring_emit_wreg, .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = sdma_v4_4_2_reset_queue, }; static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { @@ -2167,7 +2225,7 @@ static int sdma_v4_4_2_xcp_resume(void *handle, uint32_t inst_mask) if (!amdgpu_sriov_vf(adev)) sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask); - r = sdma_v4_4_2_inst_start(adev, inst_mask); + r = sdma_v4_4_2_inst_start(adev, inst_mask, false); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index fa9b40934957..b764550834a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1194,7 +1194,7 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: if (!ring->is_mes_queue) @@ -1853,10 +1853,10 @@ static void sdma_v5_0_update_medium_grain_light_sleep(struct amdgpu_device *adev } } -static int sdma_v5_0_set_clockgating_state(void *handle, +static int sdma_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1877,7 +1877,7 @@ static int sdma_v5_0_set_clockgating_state(void *handle, return 0; } -static int sdma_v5_0_set_powergating_state(void *handle, +static int sdma_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index ba5160399ab2..b1818e87889a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1050,7 +1050,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: if (!ring->is_mes_queue) @@ -1812,10 +1812,10 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev } } -static int sdma_v5_2_set_clockgating_state(void *handle, +static int sdma_v5_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1841,7 +1841,7 @@ static int sdma_v5_2_set_clockgating_state(void *handle, return 0; } -static int sdma_v5_2_set_powergating_state(void *handle, +static int sdma_v5_2_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index d46128b0ec92..1a023b45f0be 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1063,7 +1063,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: if (!ring->is_mes_queue) @@ -1601,13 +1601,13 @@ static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } -static int sdma_v6_0_set_clockgating_state(void *handle, +static int sdma_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int sdma_v6_0_set_powergating_state(void *handle, +static int sdma_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index d2ce6b6a7ff6..9c17df2cf37b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -490,162 +490,185 @@ static void sdma_v7_0_enable(struct amdgpu_device *adev, bool enable) } /** - * sdma_v7_0_gfx_resume - setup and start the async dma engines + * sdma_v7_0_gfx_resume_instance - start/restart a certain sdma engine * * @adev: amdgpu_device pointer + * @i: instance + * @restore: used to restore wptr when restart * - * Set up the gfx DMA ring buffers and enable them. - * Returns 0 for success, error for failure. + * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr. + * Return 0 for success. */ -static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev) +static int sdma_v7_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore) { struct amdgpu_ring *ring; u32 rb_cntl, ib_cntl; u32 rb_bufsz; u32 doorbell; u32 doorbell_offset; - u32 tmp; + u32 temp; u64 wptr_gpu_addr; - int i, r; - - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; + int r; - //if (!amdgpu_sriov_vf(adev)) - // WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + ring = &adev->sdma.instance[i].ring; - /* Set ring buffer size in dwords */ - rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, - RPTR_WRITEBACK_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); #endif - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); - - /* Initialize the ring buffer's read and write pointers */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + if (restore) { + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + } else { WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0); WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0); WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0); WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0); + } + /* setup the wptr shadow polling */ + wptr_gpu_addr = ring->wptr_gpu_addr; + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); + + /* set the wb address whether it's enabled or not */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI), + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO), + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + if (amdgpu_sriov_vf(adev)) + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1); + else + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); - /* setup the wptr shadow polling */ - wptr_gpu_addr = ring->wptr_gpu_addr; - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO), - lower_32_bits(wptr_gpu_addr)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI), - upper_32_bits(wptr_gpu_addr)); - - /* set the wb address whether it's enabled or not */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI), - upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO), - lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); - - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - if (amdgpu_sriov_vf(adev)) - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1); - else - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + if (!restore) ring->wptr = 0; - /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1); + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1); - if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); - } + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + } - doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL)); - doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET)); + doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL)); + doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET)); - if (ring->use_doorbell) { - doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1); - doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET, - OFFSET, ring->doorbell_index); - } else { - doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0); - } - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset); - - if (i == 0) - adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, - ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances); - - if (amdgpu_sriov_vf(adev)) - sdma_v7_0_ring_set_wptr(ring); - - /* set minor_ptr_update to 0 after wptr programed */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0); - - /* Set up sdma hang watchdog */ - tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL)); - /* 100ms per unit */ - tmp = REG_SET_FIELD(tmp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT, - max(adev->usec_timeout/100000, 1)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), tmp); - - /* Set up RESP_MODE to non-copy addresses */ - tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL)); - tmp = REG_SET_FIELD(tmp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); - tmp = REG_SET_FIELD(tmp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), tmp); - - /* program default cache read and write policy */ - tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE)); - /* clean read policy and write policy bits */ - tmp &= 0xFF0FFF; - tmp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | - (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), tmp); - - if (!amdgpu_sriov_vf(adev)) { - /* unhalt engine */ - tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL)); - tmp = REG_SET_FIELD(tmp, SDMA0_MCU_CNTL, HALT, 0); - tmp = REG_SET_FIELD(tmp, SDMA0_MCU_CNTL, RESET, 0); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), tmp); - } + if (ring->use_doorbell) { + doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1); + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + } else { + doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0); + } + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset); - /* enable DMA RB */ - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); + if (i == 0) + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances); - ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1); + if (amdgpu_sriov_vf(adev)) + sdma_v7_0_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0); + + /* Set up sdma hang watchdog */ + temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL)); + /* 100ms per unit */ + temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT, + max(adev->usec_timeout/100000, 1)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp); + + /* Set up RESP_MODE to non-copy addresses */ + temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp); + + /* program default cache read and write policy */ + temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE)); + /* clean read policy and write policy bits */ + temp &= 0xFF0FFF; + temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | + (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp); + + if (!amdgpu_sriov_vf(adev)) { + /* unhalt engine */ + temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, HALT, 0); + temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, RESET, 0); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), temp); + } + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); + + ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1); #ifdef __BIG_ENDIAN - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1); #endif - /* enable DMA IBs */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); + /* enable DMA IBs */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); + ring->sched.ready = true; - ring->sched.ready = true; + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ + sdma_v7_0_ctx_switch_enable(adev, true); + sdma_v7_0_enable(adev, true); + } - if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ - sdma_v7_0_ctx_switch_enable(adev, true); - sdma_v7_0_enable(adev, true); - } + r = amdgpu_ring_test_helper(ring); + if (r) + ring->sched.ready = false; - r = amdgpu_ring_test_helper(ring); - if (r) { - ring->sched.ready = false; - return r; - } + return r; +} + +/** + * sdma_v7_0_gfx_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the gfx DMA ring buffers and enable them. + * Returns 0 for success, error for failure. + */ +static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev) +{ + int i, r; + for (i = 0; i < adev->sdma.num_instances; i++) { + r = sdma_v7_0_gfx_resume_instance(adev, i, false); + if (r) + return r; } return 0; + } /** @@ -806,6 +829,31 @@ static bool sdma_v7_0_check_soft_reset(struct amdgpu_ip_block *ip_block) return false; } +static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int i, r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (ring == &adev->sdma.instance[i].ring) + break; + } + + if (i == adev->sdma.num_instances) { + DRM_ERROR("sdma instance not found\n"); + return -EINVAL; + } + + r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true); + if (r) + return r; + + return sdma_v7_0_gfx_resume_instance(adev, i, true); +} + /** * sdma_v7_0_start - setup and start the async dma engines * @@ -1060,7 +1108,7 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: if (!ring->is_mes_queue) @@ -1316,6 +1364,13 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } + adev->sdma.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring); + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + + r = amdgpu_sdma_sysfs_reset_mask_init(adev); + if (r) + return r; /* Allocate memory for SDMA IP Dump buffer */ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); if (ptr) @@ -1334,6 +1389,7 @@ static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); + amdgpu_sdma_sysfs_reset_mask_fini(adev); amdgpu_sdma_destroy_inst_ctx(adev, true); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) @@ -1524,13 +1580,13 @@ static int sdma_v7_0_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } -static int sdma_v7_0_set_clockgating_state(void *handle, +static int sdma_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int sdma_v7_0_set_powergating_state(void *handle, +static int sdma_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -1636,6 +1692,7 @@ static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = { .emit_reg_write_reg_wait = sdma_v7_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v7_0_ring_init_cond_exec, .preempt_ib = sdma_v7_0_ring_preempt_ib, + .reset = sdma_v7_0_reset_queue, }; static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 00f63d3fbea7..77ef7da2e4fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -2649,13 +2649,13 @@ static bool si_common_is_idle(void *handle) return true; } -static int si_common_set_clockgating_state(void *handle, +static int si_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int si_common_set_powergating_state(void *handle, +static int si_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 47647a6083e8..dbd78d5345a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -286,7 +286,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -629,13 +629,13 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev, return 0; } -static int si_dma_set_clockgating_state(void *handle, +static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { u32 orig, data, offset; int i; bool enable; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; enable = (state == AMD_CG_STATE_GATE); @@ -672,12 +672,12 @@ static int si_dma_set_clockgating_state(void *handle, return 0; } -static int si_dma_set_powergating_state(void *handle, +static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; WREG32(DMA_PGFSM_WRITE, 0x00002000); WREG32(DMA_PGFSM_CONFIG, 0x100010ff); diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 2ec1ebe4db11..a32b6243c1f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -263,13 +263,13 @@ static int si_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int si_ih_set_clockgating_state(void *handle, +static int si_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int si_ih_set_powergating_state(void *handle, +static int si_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index ede072758dab..a59b4c36cad7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -171,6 +171,24 @@ static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_encode = { .codec_array = NULL, }; +static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_encode_vcn0 = { + .codec_count = 0, + .codec_array = NULL, +}; + +static const struct amdgpu_video_codec_info vcn_5_0_1_video_codecs_decode_array_vcn0[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, +}; + +static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_decode_vcn0 = { + .codec_count = ARRAY_SIZE(vcn_5_0_1_video_codecs_decode_array_vcn0), + .codec_array = vcn_5_0_1_video_codecs_decode_array_vcn0, +}; + static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs) { @@ -209,6 +227,12 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode, else *codecs = &vcn_4_0_3_video_codecs_decode; return 0; + case IP_VERSION(5, 0, 1): + if (encode) + *codecs = &vcn_5_0_1_video_codecs_encode_vcn0; + else + *codecs = &vcn_5_0_1_video_codecs_decode_vcn0; + return 0; default: return -EINVAL; } @@ -327,6 +351,7 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 0) || amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 1) || amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 14)) return 10000; if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 0) || @@ -556,6 +581,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev) break; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 14): + case IP_VERSION(13, 0, 12): /* Use gpu_recovery param to target a reset method. * Enable triggering of GPU reset only if specified * by module parameter. @@ -1177,6 +1203,7 @@ static int soc15_common_early_init(struct amdgpu_ip_block *ip_block) break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): adev->asic_funcs = &aqua_vanjaram_asic_funcs; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | @@ -1385,10 +1412,10 @@ static void soc15_update_drm_light_sleep(struct amdgpu_device *adev, bool enable WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL), data); } -static int soc15_common_set_clockgating_state(void *handle, +static int soc15_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1453,6 +1480,7 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) && (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) && + (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 12)) && (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 14))) { /* AMD_CG_SUPPORT_DRM_MGCG */ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0)); @@ -1473,7 +1501,7 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) adev->df.funcs->get_clockgating_state(adev, flags); } -static int soc15_common_set_powergating_state(void *handle, +static int soc15_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* todo */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index d6999835918f..62ad67d0b598 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -928,10 +928,10 @@ static bool soc21_common_is_idle(void *handle) return true; } -static int soc21_common_set_clockgating_state(void *handle, +static int soc21_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(4, 3, 0): @@ -954,10 +954,10 @@ static int soc21_common_set_clockgating_state(void *handle, return 0; } -static int soc21_common_set_powergating_state(void *handle, +static int soc21_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) { case IP_VERSION(6, 0, 0): diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index be96de92b2f5..6b8e078ee7c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -444,8 +444,18 @@ static int soc24_common_late_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { xgpu_nv_mailbox_get_irq(adev); + } else { + if (adev->nbio.ras && + adev->nbio.ras_err_event_athub_irq.funcs) + /* don't need to fail gpu late init + * if enabling athub_err_event interrupt failed + * nbif v6_3_1 only support fatal error hanlding + * just enable the interrupt directly + */ + amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0); + } /* Enable selfring doorbell aperture late because doorbell BAR * aperture will change if resize BAR successfully in gmc sw_init. @@ -501,8 +511,13 @@ static int soc24_common_hw_fini(struct amdgpu_ip_block *ip_block) adev->nbio.funcs->enable_doorbell_aperture(adev, false); adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false); - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { xgpu_nv_mailbox_put_irq(adev); + } else { + if (adev->nbio.ras && + adev->nbio.ras_err_event_athub_irq.funcs) + amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0); + } return 0; } @@ -522,10 +537,10 @@ static bool soc24_common_is_idle(void *handle) return true; } -static int soc24_common_set_clockgating_state(void *handle, +static int soc24_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(6, 3, 1): @@ -542,10 +557,10 @@ static int soc24_common_set_clockgating_state(void *handle, return 0; } -static int soc24_common_set_powergating_state(void *handle, +static int soc24_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) { case IP_VERSION(7, 0, 0): diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 21b71a427b1f..64891f099366 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -30,6 +30,9 @@ #define RSP_ID_MASK (1U << 31) #define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) +/* invalid node instance value */ +#define TA_RAS_INV_NODE 0xffff + /* RAS related enumerations */ /**********************************************************/ enum ras_command { diff --git a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h index 00d8bdb8254f..9ec2e03d41c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h @@ -31,10 +31,12 @@ * Secure Display Command ID */ enum ta_securedisplay_command { - /* Query whether TA is responding used only for validation purpose */ + /* Query whether TA is responding. It is used only for validation purpose */ TA_SECUREDISPLAY_COMMAND__QUERY_TA = 1, /* Send region of Interest and CRC value to I2C */ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC = 2, + /* V2 to send multiple regions of Interest and CRC value to I2C */ + TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2 = 3, /* Maximum Command ID */ TA_SECUREDISPLAY_COMMAND__MAX_ID = 0x7FFFFFFF, }; @@ -83,6 +85,8 @@ enum ta_securedisplay_ta_query_cmd_ret { enum ta_securedisplay_buffer_size { /* 15 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) */ TA_SECUREDISPLAY_I2C_BUFFER_SIZE = 15, + /* 16 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) + 1 byte(roi_idx) */ + TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE = 16, }; /** Input/output structures for Secure Display commands */ @@ -95,7 +99,15 @@ enum ta_securedisplay_buffer_size { * Physical ID to determine which DIO scratch register should be used to get ROI */ struct ta_securedisplay_send_roi_crc_input { - uint32_t phy_id; /* Physical ID */ + /* Physical ID */ + uint32_t phy_id; +}; + +struct ta_securedisplay_send_roi_crc_v2_input { + /* Physical ID */ + uint32_t phy_id; + /* Region of interest index */ + uint8_t roi_idx; }; /** @union ta_securedisplay_cmd_input @@ -104,6 +116,8 @@ struct ta_securedisplay_send_roi_crc_input { union ta_securedisplay_cmd_input { /* send ROI and CRC input buffer format */ struct ta_securedisplay_send_roi_crc_input send_roi_crc; + /* send ROI and CRC input buffer format, v2 adds a ROI index */ + struct ta_securedisplay_send_roi_crc_v2_input send_roi_crc_v2; uint32_t reserved[4]; }; @@ -128,6 +142,10 @@ struct ta_securedisplay_send_roi_crc_output { uint8_t reserved; }; +struct ta_securedisplay_send_roi_crc_v2_output { + uint8_t i2c_buf[TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE]; /* I2C buffer */ +}; + /** @union ta_securedisplay_cmd_output * Output buffer */ @@ -136,6 +154,8 @@ union ta_securedisplay_cmd_output { struct ta_securedisplay_query_ta_output query_ta; /* Send ROI CRC output buffer format used only for validation purpose */ struct ta_securedisplay_send_roi_crc_output send_roi_crc; + /* Send ROI CRC output buffer format used only for validation purpose */ + struct ta_securedisplay_send_roi_crc_v2_output send_roi_crc_v2; uint32_t reserved[4]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 5a04a6770138..0968e551f7b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -448,13 +448,13 @@ static int tonga_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int tonga_ih_set_clockgating_state(void *handle, +static int tonga_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int tonga_ih_set_powergating_state(void *handle, +static int tonga_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 1a8ea834efa6..a7b9c358a2d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -173,156 +173,96 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev, umc_v12_0_reset_error_count(adev); } -static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, +static int umc_v12_0_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, - struct ta_ras_query_address_input *addr_in) + struct ta_ras_query_address_input *addr_in, + struct ta_ras_query_address_output *addr_out, + bool dump_addr) { - uint32_t col, row, row_xor, bank, channel_index; - uint64_t soc_pa, retired_page, column, err_addr; - struct ta_ras_query_address_output addr_out; + uint32_t col, col_lower, row, row_lower, bank; + uint32_t channel_index = 0, umc_inst = 0; + uint32_t i, loop_bits[UMC_V12_0_RETIRE_LOOP_BITS]; + uint64_t soc_pa, column, err_addr; + struct ta_ras_query_address_output addr_out_tmp; + struct ta_ras_query_address_output *paddr_out; + enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; + int ret = 0; + + if (!addr_out) + paddr_out = &addr_out_tmp; + else + paddr_out = addr_out; - err_addr = addr_in->ma.err_addr; - addr_in->addr_type = TA_RAS_MCA_TO_PA; - if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) { - dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", - err_addr); + err_addr = bank = 0; + if (addr_in) { + err_addr = addr_in->ma.err_addr; + addr_in->addr_type = TA_RAS_MCA_TO_PA; + ret = psp_ras_query_address(&adev->psp, addr_in, paddr_out); + if (ret) { + dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", + err_addr); - return; - } + goto out; + } - soc_pa = addr_out.pa.pa; - bank = addr_out.pa.bank; - channel_index = addr_out.pa.channel_idx; - - col = (err_addr >> 1) & 0x1fULL; - row = (err_addr >> 10) & 0x3fffULL; - row_xor = row ^ (0x1ULL << 13); - /* clear [C3 C2] in soc physical address */ - soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); - /* clear [C4] in soc physical address */ - soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); - - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { - retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); - retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); - /* include column bit 0 and 1 */ - col &= 0x3; - col |= (column << 2); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row, col, bank, channel_index); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, addr_in->ma.umc_inst); - - /* shift R13 bit */ - retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row_xor, col, bank, channel_index); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, addr_in->ma.umc_inst); + bank = paddr_out->pa.bank; + /* no need to care about umc inst if addr_in is NULL */ + umc_inst = addr_in->ma.umc_inst; } -} -static void umc_v12_0_dump_addr_info(struct amdgpu_device *adev, - struct ta_ras_query_address_output *addr_out, - uint64_t err_addr) -{ - uint32_t col, row, row_xor, bank, channel_index; - uint64_t soc_pa, retired_page, column; - - soc_pa = addr_out->pa.pa; - bank = addr_out->pa.bank; - channel_index = addr_out->pa.channel_idx; - - col = (err_addr >> 1) & 0x1fULL; - row = (err_addr >> 10) & 0x3fffULL; - row_xor = row ^ (0x1ULL << 13); - /* clear [C3 C2] in soc physical address */ - soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); - /* clear [C4] in soc physical address */ - soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); - - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { - retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); - retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); - /* include column bit 0 and 1 */ - col &= 0x3; - col |= (column << 2); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row, col, bank, channel_index); - - /* shift R13 bit */ - retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row_xor, col, bank, channel_index); - } -} + loop_bits[0] = UMC_V12_0_PA_C2_BIT; + loop_bits[1] = UMC_V12_0_PA_C3_BIT; + loop_bits[2] = UMC_V12_0_PA_C4_BIT; + loop_bits[3] = UMC_V12_0_PA_R13_BIT; -static int umc_v12_0_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, - uint64_t pa_addr, uint64_t *pfns, int len) -{ - uint64_t soc_pa, retired_page, column; - uint32_t pos = 0; - - soc_pa = pa_addr; - /* clear [C3 C2] in soc physical address */ - soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); - /* clear [C4] in soc physical address */ - soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); - - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { - retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); - retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); - - if (pos >= len) - return 0; - pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - - /* shift R13 bit */ - retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); - - if (pos >= len) - return 0; - pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + /* other nps modes are taken as nps1 */ + if (nps == AMDGPU_NPS4_PARTITION_MODE) { + loop_bits[0] = UMC_V12_0_PA_CH4_BIT; + loop_bits[1] = UMC_V12_0_PA_CH5_BIT; + loop_bits[2] = UMC_V12_0_PA_B0_BIT; + loop_bits[3] = UMC_V12_0_PA_R11_BIT; } - return pos; -} - -static int umc_v12_0_convert_mca_to_addr(struct amdgpu_device *adev, - uint64_t err_addr, uint32_t ch, uint32_t umc, - uint32_t node, uint32_t socket, - uint64_t *addr, bool dump_addr) -{ - struct ta_ras_query_address_input addr_in; - struct ta_ras_query_address_output addr_out; - - memset(&addr_in, 0, sizeof(addr_in)); - addr_in.ma.err_addr = err_addr; - addr_in.ma.ch_inst = ch; - addr_in.ma.umc_inst = umc; - addr_in.ma.node_inst = node; - addr_in.ma.socket_id = socket; - addr_in.addr_type = TA_RAS_MCA_TO_PA; - if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out)) { - dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", - err_addr); - return -EINVAL; + soc_pa = paddr_out->pa.pa; + channel_index = paddr_out->pa.channel_idx; + /* clear loop bits in soc physical address */ + for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++) + soc_pa &= ~BIT_ULL(loop_bits[i]); + + paddr_out->pa.pa = soc_pa; + /* get column bit 0 and 1 in mca address */ + col_lower = (err_addr >> 1) & 0x3ULL; + /* MA_R13_BIT will be handled later */ + row_lower = (err_addr >> UMC_V12_0_MA_R0_BIT) & 0x1fffULL; + + if (!err_data && !dump_addr) + goto out; + + /* loop for all possibilities of retired bits */ + for (column = 0; column < UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; column++) { + soc_pa = paddr_out->pa.pa; + for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++) + soc_pa |= (((column >> i) & 0x1ULL) << loop_bits[i]); + + col = ((column & 0x7) << 2) | col_lower; + /* add row bit 13 */ + row = ((column >> 3) << 13) | row_lower; + + if (dump_addr) + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + soc_pa, row, col, bank, channel_index); + + if (err_data) + amdgpu_umc_fill_error_record(err_data, err_addr, + soc_pa, channel_index, umc_inst); } - if (dump_addr) - umc_v12_0_dump_addr_info(adev, &addr_out, err_addr); - - *addr = addr_out.pa.pa; - - return 0; +out: + return ret; } static int umc_v12_0_query_error_address(struct amdgpu_device *adev, @@ -374,7 +314,7 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, addr_in.ma.umc_inst = umc_inst; addr_in.ma.node_inst = node_inst; - umc_v12_0_convert_error_address(adev, err_data, &addr_in); + umc_v12_0_convert_error_address(adev, err_data, &addr_in, NULL, true); } /* clear umc status */ @@ -526,6 +466,9 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; uint64_t err_addr, pa_addr = 0; struct ras_ecc_err *ecc_err; + struct ta_ras_query_address_output addr_out; + enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; + uint32_t shift_bit = UMC_V12_0_PA_C4_BIT; int count, ret, i; hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); @@ -552,10 +495,10 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, MCA_IPID_2_UMC_CH(ipid), err_addr); - ret = umc_v12_0_convert_mca_to_addr(adev, + ret = amdgpu_umc_mca_to_addr(adev, err_addr, MCA_IPID_2_UMC_CH(ipid), MCA_IPID_2_UMC_INST(ipid), MCA_IPID_2_DIE_ID(ipid), - MCA_IPID_2_SOCKET_ID(ipid), &pa_addr, true); + MCA_IPID_2_SOCKET_ID(ipid), &addr_out, true); if (ret) return ret; @@ -563,14 +506,21 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, if (!ecc_err) return -ENOMEM; + pa_addr = addr_out.pa.pa; ecc_err->status = status; ecc_err->ipid = ipid; ecc_err->addr = addr; - ecc_err->pa_pfn = UMC_V12_ADDR_MASK_BAD_COLS(pa_addr) >> AMDGPU_GPU_PAGE_SHIFT; + ecc_err->pa_pfn = pa_addr >> AMDGPU_GPU_PAGE_SHIFT; + ecc_err->channel_idx = addr_out.pa.channel_idx; + + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + if (nps == AMDGPU_NPS4_PARTITION_MODE) + shift_bit = UMC_V12_0_PA_B0_BIT; /* If converted pa_pfn is 0, use pa C4 pfn. */ if (!ecc_err->pa_pfn) - ecc_err->pa_pfn = BIT_ULL(UMC_V12_0_PA_C4_BIT) >> AMDGPU_GPU_PAGE_SHIFT; + ecc_err->pa_pfn = BIT_ULL(shift_bit) >> AMDGPU_GPU_PAGE_SHIFT; ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err); if (ret) { @@ -586,7 +536,7 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, con->umc_ecc_log.de_queried_count++; memset(page_pfn, 0, sizeof(page_pfn)); - count = umc_v12_0_lookup_bad_pages_in_a_row(adev, + count = amdgpu_umc_lookup_bad_pages_in_a_row(adev, pa_addr, page_pfn, ARRAY_SIZE(page_pfn)); if (count <= 0) { @@ -629,7 +579,7 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev, return -EINVAL; memset(page_pfn, 0, sizeof(page_pfn)); - count = umc_v12_0_lookup_bad_pages_in_a_row(adev, + count = amdgpu_umc_lookup_bad_pages_in_a_row(adev, ecc_err->pa_pfn << AMDGPU_GPU_PAGE_SHIFT, page_pfn, ARRAY_SIZE(page_pfn)); @@ -637,7 +587,7 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev, ret = amdgpu_umc_fill_error_record(err_data, ecc_err->addr, page_pfn[i] << AMDGPU_GPU_PAGE_SHIFT, - MCA_IPID_2_UMC_CH(ecc_err->ipid), + ecc_err->channel_idx, MCA_IPID_2_UMC_INST(ecc_err->ipid)); if (ret) break; @@ -676,6 +626,31 @@ static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev, mutex_unlock(&con->umc_ecc_log.lock); } +static uint32_t umc_v12_0_get_die_id(struct amdgpu_device *adev, + uint64_t mca_addr, uint64_t retired_page) +{ + uint32_t die = 0; + + /* we only calculate die id for nps1 mode right now */ + die += ((((retired_page >> 12) & 0x1ULL)^ + ((retired_page >> 20) & 0x1ULL) ^ + ((retired_page >> 27) & 0x1ULL) ^ + ((retired_page >> 34) & 0x1ULL) ^ + ((retired_page >> 41) & 0x1ULL)) << 0); + + /* the original PA_C4 and PA_R13 may be cleared in retired_page, so + * get them from mca_addr. + */ + die += ((((retired_page >> 13) & 0x1ULL) ^ + ((mca_addr >> 5) & 0x1ULL) ^ + ((retired_page >> 28) & 0x1ULL) ^ + ((mca_addr >> 23) & 0x1ULL) ^ + ((retired_page >> 42) & 0x1ULL)) << 1); + die &= 3; + + return die; +} + struct amdgpu_umc_ras umc_v12_0_ras = { .ras_block = { .hw_ops = &umc_v12_0_ras_hw_ops, @@ -686,5 +661,7 @@ struct amdgpu_umc_ras umc_v12_0_ras = { .ecc_info_query_ras_error_address = umc_v12_0_query_ras_ecc_err_addr, .check_ecc_err_status = umc_v12_0_check_ecc_err_status, .update_ecc_status = umc_v12_0_update_ecc_status, + .convert_ras_err_addr = umc_v12_0_convert_error_address, + .get_die_id_from_pa = umc_v12_0_get_die_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index be5598d76c1d..9298018d938f 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -55,12 +55,24 @@ #define UMC_V12_0_NA_MAP_PA_NUM 8 /* R13 bit shift should be considered, double the number */ #define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2) +/* C2, C3, C4, R13, four bits in MCA address are looped in retirement */ +#define UMC_V12_0_RETIRE_LOOP_BITS 4 /* column bits in SOC physical address */ #define UMC_V12_0_PA_C2_BIT 15 +#define UMC_V12_0_PA_C3_BIT 16 #define UMC_V12_0_PA_C4_BIT 21 /* row bits in SOC physical address */ +#define UMC_V12_0_PA_R0_BIT 22 +#define UMC_V12_0_PA_R11_BIT 33 #define UMC_V12_0_PA_R13_BIT 35 +/* channel bit in SOC physical address */ +#define UMC_V12_0_PA_CH4_BIT 12 +#define UMC_V12_0_PA_CH5_BIT 13 +/* bank bit in SOC physical address */ +#define UMC_V12_0_PA_B0_BIT 19 +/* row bits in MCA address */ +#define UMC_V12_0_MA_R0_BIT 10 #define MCA_UMC_HWID_V12_0 0x96 #define MCA_UMC_MCATYPE_V12_0 0x0 @@ -81,11 +93,6 @@ (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \ (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03)) -#define UMC_V12_ADDR_MASK_BAD_COLS(addr) \ - ((addr) & ~((0x3ULL << UMC_V12_0_PA_C2_BIT) | \ - (0x1ULL << UMC_V12_0_PA_C4_BIT) | \ - (0x1ULL << UMC_V12_0_PA_R13_BIT))) - bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c new file mode 100644 index 000000000000..eaca10a3c4a9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c @@ -0,0 +1,160 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v8_14.h" +#include "amdgpu_ras.h" +#include "amdgpu_umc.h" +#include "amdgpu.h" +#include "umc/umc_8_14_0_offset.h" +#include "umc/umc_8_14_0_sh_mask.h" + +static inline uint32_t get_umc_v8_14_reg_offset(struct amdgpu_device *adev, + uint32_t umc_inst, + uint32_t ch_inst) +{ + return adev->umc.channel_offs * ch_inst + UMC_V8_14_INST_DIST * umc_inst; +} + +static int umc_v8_14_clear_error_count_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + uint32_t ecc_err_cnt_addr; + uint32_t umc_reg_offset = + get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst); + + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); + + /* clear error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V8_14_CE_CNT_INIT); + + return 0; +} + +static void umc_v8_14_clear_error_count(struct amdgpu_device *adev) +{ + amdgpu_umc_loop_channels(adev, + umc_v8_14_clear_error_count_per_channel, NULL); +} + +static void umc_v8_14_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + + /* UMC 8_14 registers */ + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); + + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccErrCnt) - + UMC_V8_14_CE_CNT_INIT); +} + +static void umc_v8_14_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + /* UMC 8_14 registers */ + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); + + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccUnCorrErrCnt) - + UMC_V8_14_CE_CNT_INIT); +} + +static int umc_v8_14_query_error_count_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + uint32_t umc_reg_offset = + get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst); + + umc_v8_14_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v8_14_query_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + + return 0; +} + +static void umc_v8_14_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_umc_loop_channels(adev, + umc_v8_14_query_error_count_per_channel, ras_error_status); + + umc_v8_14_clear_error_count(adev); +} + +static int umc_v8_14_err_cnt_init_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt_addr; + uint32_t umc_reg_offset = + get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst); + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); + + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); + + /* set ce error interrupt type to APIC based interrupt */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_GeccErrCntSel, + GeccErrInt, 0x1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + /* set error count to initial value */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_14_CE_CNT_INIT); + + return 0; +} + +static void umc_v8_14_err_cnt_init(struct amdgpu_device *adev) +{ + amdgpu_umc_loop_channels(adev, + umc_v8_14_err_cnt_init_per_channel, NULL); +} + +const struct amdgpu_ras_block_hw_ops umc_v8_14_ras_hw_ops = { + .query_ras_error_count = umc_v8_14_query_ras_error_count, +}; + +struct amdgpu_umc_ras umc_v8_14_ras = { + .ras_block = { + .hw_ops = &umc_v8_14_ras_hw_ops, + }, + .err_cnt_init = umc_v8_14_err_cnt_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h new file mode 100644 index 000000000000..20a258f0017a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h @@ -0,0 +1,51 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V8_14_H__ +#define __UMC_V8_14_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +/* number of umc channel instance with memory map register access */ +#define UMC_V8_14_CHANNEL_INSTANCE_NUM 2 +/* number of umc instance with memory map register access */ +#define UMC_V8_14_UMC_INSTANCE_NUM(adev) ((adev)->umc.node_inst_num) + +/* Total channel instances for all available umc nodes */ +#define UMC_V8_14_TOTAL_CHANNEL_NUM(adev) \ + (UMC_V8_14_CHANNEL_INSTANCE_NUM * (adev)->gmc.num_umc) + +/* UMC register per channel offset */ +#define UMC_V8_14_PER_CHANNEL_OFFSET 0x400 + +#define UMC_V8_14_INST_DIST 0x40000 + +/* EccErrCnt max value */ +#define UMC_V8_14_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UMC_V8_14_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V8_14_CE_CNT_INIT (UMC_V8_14_CE_CNT_MAX - UMC_V8_14_CE_INT_THRESHOLD) + +extern struct amdgpu_umc_ras umc_v8_14_ras; +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index bdbca25d80c4..5830e799c0a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -790,13 +790,13 @@ static int uvd_v3_1_soft_reset(struct amdgpu_ip_block *ip_block) return uvd_v3_1_start(adev); } -static int uvd_v3_1_set_clockgating_state(void *handle, +static int uvd_v3_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int uvd_v3_1_set_powergating_state(void *handle, +static int uvd_v3_1_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index a836dc9cfcad..f93079e09215 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -44,7 +44,7 @@ static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev); static int uvd_v4_2_start(struct amdgpu_device *adev); static void uvd_v4_2_stop(struct amdgpu_device *adev); -static int uvd_v4_2_set_clockgating_state(void *handle, +static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); static void uvd_v4_2_set_dcm(struct amdgpu_device *adev, bool sw_mode); @@ -708,13 +708,13 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev, return 0; } -static int uvd_v4_2_set_clockgating_state(void *handle, +static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int uvd_v4_2_set_powergating_state(void *handle, +static int uvd_v4_2_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the UVD block. @@ -724,7 +724,7 @@ static int uvd_v4_2_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) { uvd_v4_2_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index ab55fae3569e..050a0f309390 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -42,7 +42,7 @@ static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev); static int uvd_v5_0_start(struct amdgpu_device *adev); static void uvd_v5_0_stop(struct amdgpu_device *adev); -static int uvd_v5_0_set_clockgating_state(void *handle, +static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev, bool enable); @@ -155,7 +155,7 @@ static int uvd_v5_0_hw_init(struct amdgpu_ip_block *ip_block) int r; amdgpu_asic_set_uvd_clocks(adev, 10000, 10000); - uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); + uvd_v5_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE); uvd_v5_0_enable_mgcg(adev, true); r = amdgpu_ring_test_helper(ring); @@ -790,16 +790,11 @@ static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev, } } -static int uvd_v5_0_set_clockgating_state(void *handle, +static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); - struct amdgpu_ip_block *ip_block; - - ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD); - if (!ip_block) - return -EINVAL; if (enable) { /* wait for STATUS to clear */ @@ -817,7 +812,7 @@ static int uvd_v5_0_set_clockgating_state(void *handle, return 0; } -static int uvd_v5_0_set_powergating_state(void *handle, +static int uvd_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the UVD block. @@ -827,7 +822,7 @@ static int uvd_v5_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret = 0; if (state == AMD_PG_STATE_GATE) { diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 39f8c3d3a135..d9d036ee51fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -48,7 +48,7 @@ static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev); static int uvd_v6_0_start(struct amdgpu_device *adev); static void uvd_v6_0_stop(struct amdgpu_device *adev); static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev); -static int uvd_v6_0_set_clockgating_state(void *handle, +static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, bool enable); @@ -467,7 +467,7 @@ static int uvd_v6_0_hw_init(struct amdgpu_ip_block *ip_block) int i, r; amdgpu_asic_set_uvd_clocks(adev, 10000, 10000); - uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); + uvd_v6_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE); uvd_v6_0_enable_mgcg(adev, true); r = amdgpu_ring_test_helper(ring); @@ -1450,17 +1450,12 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, } } -static int uvd_v6_0_set_clockgating_state(void *handle, +static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ip_block *ip_block; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); - ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD); - if (!ip_block) - return -EINVAL; - if (enable) { /* wait for STATUS to clear */ if (uvd_v6_0_wait_for_idle(ip_block)) @@ -1476,7 +1471,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle, return 0; } -static int uvd_v6_0_set_powergating_state(void *handle, +static int uvd_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the UVD block. @@ -1486,7 +1481,7 @@ static int uvd_v6_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret = 0; WREG32(mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 079131aeb2f7..9d237b5937fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1288,7 +1288,7 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_job *job, struct amdgpu_ib *ib) { - struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); + struct amdgpu_ring *ring = amdgpu_job_ring(job); unsigned i; /* No patching necessary for the first instance */ @@ -1511,7 +1511,7 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int uvd_v7_0_set_clockgating_state(void *handle, +static int uvd_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { /* needed for driver unload*/ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index c1ed91b39415..c633b7ff2943 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -578,13 +578,13 @@ static int vce_v2_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int vce_v2_0_set_clockgating_state(void *handle, +static int vce_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; bool sw_cg = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) { gate = true; @@ -596,7 +596,7 @@ static int vce_v2_0_set_clockgating_state(void *handle, return 0; } -static int vce_v2_0_set_powergating_state(void *handle, +static int vce_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the VCE block. @@ -606,7 +606,7 @@ static int vce_v2_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) return vce_v2_0_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 6bb318a06f19..f8bddcd19b68 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -65,7 +65,7 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx); static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev); static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev); static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block); -static int vce_v3_0_set_clockgating_state(void *handle, +static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); /** * vce_v3_0_ring_get_rptr - get read pointer @@ -497,7 +497,7 @@ static int vce_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) return r; vce_v3_0_stop(adev); - return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE); + return vce_v3_0_set_clockgating_state(ip_block, AMD_CG_STATE_GATE); } static int vce_v3_0_suspend(struct amdgpu_ip_block *ip_block) @@ -760,10 +760,10 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int vce_v3_0_set_clockgating_state(void *handle, +static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); int i; @@ -801,7 +801,7 @@ static int vce_v3_0_set_clockgating_state(void *handle, return 0; } -static int vce_v3_0_set_powergating_state(void *handle, +static int vce_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the VCE block. @@ -811,7 +811,7 @@ static int vce_v3_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret = 0; if (state == AMD_PG_STATE_GATE) { diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 79ee555768a5..335bda64ff5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -684,14 +684,14 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev) ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); } -static int vce_v4_0_set_clockgating_state(void *handle, +static int vce_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { /* needed for driver unload*/ return 0; } -static int vce_v4_0_set_powergating_state(void *handle, +static int vce_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the VCE block. @@ -701,7 +701,7 @@ static int vce_v4_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) return vce_v4_0_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 10e99c926fb8..5ea96c983517 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -85,7 +85,8 @@ static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); +static int vcn_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state); static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -281,7 +282,7 @@ static int vcn_v1_0_hw_fini(struct amdgpu_ip_block *ip_block) if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, 0, mmUVD_STATUS))) { - vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + vcn_v1_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } return 0; @@ -303,7 +304,7 @@ static int vcn_v1_0_suspend(struct amdgpu_ip_block *ip_block) idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work); if (idle_work_unexecuted) { if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vcn(adev, false, 0); } r = vcn_v1_0_hw_fini(ip_block); @@ -344,7 +345,7 @@ static int vcn_v1_0_resume(struct amdgpu_ip_block *ip_block) */ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -411,7 +412,7 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1394,15 +1395,15 @@ static int vcn_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int vcn_v1_0_set_clockgating_state(void *handle, +static int vcn_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ - if (!vcn_v1_0_is_idle(handle)) + if (!vcn_v1_0_is_idle(adev)) return -EBUSY; vcn_v1_0_enable_clock_gating(adev); } else { @@ -1799,7 +1800,7 @@ static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t coun } } -static int vcn_v1_0_set_powergating_state(void *handle, +static int vcn_v1_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the VCN block. @@ -1810,7 +1811,7 @@ static int vcn_v1_0_set_powergating_state(void *handle, * the smc and the hw blocks */ int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == adev->vcn.cur_state) return 0; @@ -1856,7 +1857,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work) if (fences == 0) { amdgpu_gfx_off_ctrl(adev, true); if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vcn(adev, false, 0); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); @@ -1886,7 +1887,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks) if (set_clocks) { amdgpu_gfx_off_ctrl(adev, false); if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + amdgpu_dpm_enable_vcn(adev, true, 0); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index e0322cbca3ec..e42cfc731ad8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -92,7 +92,7 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_0[] = { static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v2_0_set_powergating_state(void *handle, +static int vcn_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -318,7 +318,7 @@ static int vcn_v2_0_hw_fini(struct amdgpu_ip_block *ip_block) if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, 0, mmUVD_STATUS))) - vcn_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + vcn_v2_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); return 0; } @@ -372,7 +372,7 @@ static int vcn_v2_0_resume(struct amdgpu_ip_block *ip_block) */ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); uint32_t offset; if (amdgpu_sriov_vf(adev)) @@ -428,7 +428,7 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -978,7 +978,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) int i, j, r; if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + amdgpu_dpm_enable_vcn(adev, true, 0); if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram); @@ -1235,7 +1235,7 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev) power_off: if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vcn(adev, false, 0); return 0; } @@ -1335,10 +1335,10 @@ static int vcn_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int vcn_v2_0_set_clockgating_state(void *handle, +static int vcn_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -1346,7 +1346,7 @@ static int vcn_v2_0_set_clockgating_state(void *handle, if (enable) { /* wait for STATUS to clear */ - if (!vcn_v2_0_is_idle(handle)) + if (!vcn_v2_0_is_idle(adev)) return -EBUSY; vcn_v2_0_enable_clock_gating(adev); } else { @@ -1796,7 +1796,7 @@ int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring) } -static int vcn_v2_0_set_powergating_state(void *handle, +static int vcn_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the VCN block. @@ -1807,7 +1807,7 @@ static int vcn_v2_0_set_powergating_state(void *handle, * the smc and the hw blocks */ int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) { adev->vcn.cur_state = AMD_PG_STATE_UNGATE; @@ -1920,7 +1920,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) init_table += header->vcn_table_offset; - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT( SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 6aa08281d094..b518202955ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -95,7 +95,7 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_5[] = { static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v2_5_set_powergating_state(void *handle, +static int vcn_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -399,7 +399,7 @@ static int vcn_v2_5_hw_fini(struct amdgpu_ip_block *ip_block) if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, i, mmUVD_STATUS))) - vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + vcn_v2_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE); if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0); @@ -465,7 +465,7 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, @@ -514,7 +514,7 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1012,8 +1012,10 @@ static int vcn_v2_5_start(struct amdgpu_device *adev) uint32_t rb_bufsz, tmp; int i, j, k, r; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, true, i); + } for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -1285,7 +1287,7 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); /* mc resume*/ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V1_0_INSERT_DIRECT_WT( @@ -1485,8 +1487,10 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev) ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); } - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, false, i); + } return 0; } @@ -1778,6 +1782,7 @@ static bool vcn_v2_5_is_idle(void *handle) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; + ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE); } @@ -1801,17 +1806,17 @@ static int vcn_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int vcn_v2_5_set_clockgating_state(void *handle, +static int vcn_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); if (amdgpu_sriov_vf(adev)) return 0; if (enable) { - if (!vcn_v2_5_is_idle(handle)) + if (!vcn_v2_5_is_idle(adev)) return -EBUSY; vcn_v2_5_enable_clock_gating(adev); } else { @@ -1821,10 +1826,10 @@ static int vcn_v2_5_set_clockgating_state(void *handle, return 0; } -static int vcn_v2_5_set_powergating_state(void *handle, +static int vcn_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 6732ad7f16f5..63ddd4cca910 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -105,7 +105,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev); static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v3_0_set_powergating_state(void *handle, +static int vcn_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -430,9 +430,9 @@ static int vcn_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) { if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || - (adev->vcn.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(VCN, i, mmUVD_STATUS))) { - vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, mmUVD_STATUS))) { + vcn_v3_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } } } @@ -490,7 +490,7 @@ static int vcn_v3_0_resume(struct amdgpu_ip_block *ip_block) */ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst]->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -540,7 +540,7 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4); + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -1141,8 +1141,10 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) uint32_t rb_bufsz, tmp; int i, j, k, r; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, true, i); + } for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -1373,7 +1375,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) mmUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, @@ -1632,8 +1634,10 @@ static int vcn_v3_0_stop(struct amdgpu_device *adev) vcn_v3_0_enable_static_power_gating(adev, i); } - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, false, i); + } return 0; } @@ -2132,10 +2136,10 @@ static int vcn_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int vcn_v3_0_set_clockgating_state(void *handle, +static int vcn_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; int i; @@ -2155,10 +2159,10 @@ static int vcn_v3_0_set_clockgating_state(void *handle, return 0; } -static int vcn_v3_0_set_powergating_state(void *handle, +static int vcn_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; /* for SRIOV, guest should not control VCN Power-gating diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index fcc8511e91ee..00551d6f0370 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -96,7 +96,7 @@ static int amdgpu_ih_clientid_vcns[] = { static int vcn_v4_0_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v4_0_set_powergating_state(void *handle, +static int vcn_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -366,9 +366,9 @@ static int vcn_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) continue; if (!amdgpu_sriov_vf(adev)) { if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || - (adev->vcn.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(VCN, i, regUVD_STATUS))) { - vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + (adev->vcn.cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, regUVD_STATUS))) { + vcn_v4_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } } if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) @@ -431,7 +431,7 @@ static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst) uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -491,7 +491,7 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx { uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -1097,8 +1097,10 @@ static int vcn_v4_0_start(struct amdgpu_device *adev) uint32_t tmp; int i, j, k, r; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, true, i); + } for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -1341,7 +1343,7 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) regUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, @@ -1623,8 +1625,10 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev) vcn_v4_0_enable_static_power_gating(adev, i); } - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, false, i); + } return 0; } @@ -2007,14 +2011,15 @@ static int vcn_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block) /** * vcn_v4_0_set_clockgating_state - set VCN block clockgating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: clock gating state * * Set VCN block clockgating state */ -static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) +static int vcn_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; int i; @@ -2037,14 +2042,15 @@ static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_sta /** * vcn_v4_0_set_powergating_state - set VCN block powergating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: power gating state * * Set VCN block powergating state */ -static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state) +static int vcn_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; /* for SRIOV, guest should not control VCN Power-gating diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 3f69b9b2bcd0..ecdc027f8220 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -87,7 +87,7 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v4_0_3_set_powergating_state(void *handle, +static int vcn_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -349,7 +349,7 @@ static int vcn_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) cancel_delayed_work_sync(&adev->vcn.idle_work); if (adev->vcn.cur_state != AMD_PG_STATE_GATE) - vcn_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); + vcn_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE); return 0; } @@ -407,7 +407,7 @@ static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx) uint32_t offset, size, vcn_inst; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); vcn_inst = GET_INST(VCN, inst_idx); @@ -482,7 +482,7 @@ static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -957,6 +957,8 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; i++) { vcn_inst = GET_INST(VCN, i); + vcn_v4_0_3_fw_shared_init(adev, vcn_inst); + memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header)); header.version = MMSCH_VERSION; header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; @@ -969,7 +971,7 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, @@ -1121,8 +1123,10 @@ static int vcn_v4_0_3_start(struct amdgpu_device *adev) int i, j, k, r, vcn_inst; uint32_t tmp; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, true, i); + } for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -1395,8 +1399,10 @@ static int vcn_v4_0_3_stop(struct amdgpu_device *adev) vcn_v4_0_3_enable_clock_gating(adev, i); } Done: - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, false, i); + } return 0; } @@ -1616,15 +1622,15 @@ static int vcn_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block) /* vcn_v4_0_3_set_clockgating_state - set VCN block clockgating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: clock gating state * * Set VCN block clockgating state */ -static int vcn_v4_0_3_set_clockgating_state(void *handle, +static int vcn_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; int i; @@ -1644,15 +1650,15 @@ static int vcn_v4_0_3_set_clockgating_state(void *handle, /** * vcn_v4_0_3_set_powergating_state - set VCN block powergating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: power gating state * * Set VCN block powergating state */ -static int vcn_v4_0_3_set_powergating_state(void *handle, +static int vcn_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; /* for SRIOV, guest should not control VCN Power-gating @@ -1911,9 +1917,94 @@ static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = { .reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count, }; +static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_bank_info info; + u64 misc0; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +/* reference to smu driver if header file */ +static int vcn_v4_0_3_err_codes[] = { + 14, 15, /* VCN */ +}; + +static bool vcn_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + vcn_v4_0_3_err_codes, + ARRAY_SIZE(vcn_v4_0_3_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops vcn_v4_0_3_aca_bank_ops = { + .aca_bank_parser = vcn_v4_0_3_aca_bank_parser, + .aca_bank_is_valid = vcn_v4_0_3_aca_bank_is_valid, +}; + +static const struct aca_info vcn_v4_0_3_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &vcn_v4_0_3_aca_bank_ops, +}; + +static int vcn_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__VCN, + &vcn_v4_0_3_aca_info, NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + static struct amdgpu_vcn_ras vcn_v4_0_3_ras = { .ras_block = { .hw_ops = &vcn_v4_0_3_ras_hw_ops, + .ras_late_init = vcn_v4_0_3_ras_late_init, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 71961fb3f7ff..23d3c16c9d9f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -95,7 +95,7 @@ static int amdgpu_ih_clientid_vcns[] = { static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v4_0_5_set_powergating_state(void *handle, +static int vcn_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -309,7 +309,7 @@ static int vcn_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block) if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, i, regUVD_STATUS))) { - vcn_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + vcn_v4_0_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } } } @@ -370,7 +370,7 @@ static void vcn_v4_0_5_mc_resume(struct amdgpu_device *adev, int inst) uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -431,7 +431,7 @@ static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -1000,8 +1000,10 @@ static int vcn_v4_0_5_start(struct amdgpu_device *adev) uint32_t tmp; int i, j, k, r; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, true, i); + } for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -1277,8 +1279,10 @@ static int vcn_v4_0_5_stop(struct amdgpu_device *adev) vcn_v4_0_5_enable_static_power_gating(adev, i); } - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, false, i); + } return 0; } @@ -1492,14 +1496,15 @@ static int vcn_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block) /** * vcn_v4_0_5_set_clockgating_state - set VCN block clockgating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: clock gating state * * Set VCN block clockgating state */ -static int vcn_v4_0_5_set_clockgating_state(void *handle, enum amd_clockgating_state state) +static int vcn_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; int i; @@ -1522,14 +1527,15 @@ static int vcn_v4_0_5_set_clockgating_state(void *handle, enum amd_clockgating_s /** * vcn_v4_0_5_set_powergating_state - set VCN block powergating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: power gating state * * Set VCN block powergating state */ -static int vcn_v4_0_5_set_powergating_state(void *handle, enum amd_powergating_state state) +static int vcn_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (state == adev->vcn.cur_state) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index bd3d2bbdc16b..b6d78381ebfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -32,7 +32,7 @@ #include "vcn/vcn_5_0_0_offset.h" #include "vcn/vcn_5_0_0_sh_mask.h" -#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" #include "vcn_v5_0_0.h" #include <drm/drm_drv.h> @@ -78,7 +78,7 @@ static int amdgpu_ih_clientid_vcns[] = { static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v5_0_0_set_powergating_state(void *handle, +static int vcn_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state); @@ -105,6 +105,21 @@ static int vcn_v5_0_0_early_init(struct amdgpu_ip_block *ip_block) return amdgpu_vcn_early_init(adev); } +void vcn_v5_0_0_alloc_ip_dump(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t *ptr; + + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } +} + /** * vcn_v5_0_0_sw_init - sw init for VCN block * @@ -117,8 +132,6 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_ring *ring; struct amdgpu_device *adev = ip_block->adev; int i, r; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); - uint32_t *ptr; r = amdgpu_vcn_sw_init(adev); if (r) @@ -140,13 +153,13 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) /* VCN UNIFIED TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], - VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); + VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); if (r) return r; /* VCN POISON TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], - VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq); + VCN_5_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq); if (r) return r; @@ -177,14 +190,7 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } + vcn_v5_0_0_alloc_ip_dump(adev); r = amdgpu_vcn_sysfs_reset_mask_init(adev); if (r) @@ -283,7 +289,7 @@ static int vcn_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block) if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || (adev->vcn.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, i, regUVD_STATUS))) { - vcn_v5_0_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + vcn_v5_0_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } } } @@ -344,7 +350,7 @@ static void vcn_v5_0_0_mc_resume(struct amdgpu_device *adev, int inst) uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -405,7 +411,7 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -771,8 +777,10 @@ static int vcn_v5_0_0_start(struct amdgpu_device *adev) uint32_t tmp; int i, j, k, r; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, true, i); + } for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) @@ -1018,8 +1026,10 @@ static int vcn_v5_0_0_stop(struct amdgpu_device *adev) vcn_v5_0_0_enable_static_power_gating(adev, i); } - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_vcn(adev, false, i); + } return 0; } @@ -1229,14 +1239,15 @@ static int vcn_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block) /** * vcn_v5_0_0_set_clockgating_state - set VCN block clockgating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: clock gating state * * Set VCN block clockgating state */ -static int vcn_v5_0_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) +static int vcn_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; int i; @@ -1259,14 +1270,15 @@ static int vcn_v5_0_0_set_clockgating_state(void *handle, enum amd_clockgating_s /** * vcn_v5_0_0_set_powergating_state - set VCN block powergating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: power gating state * * Set VCN block powergating state */ -static int vcn_v5_0_0_set_powergating_state(void *handle, enum amd_powergating_state state) +static int vcn_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (state == adev->vcn.cur_state) @@ -1312,10 +1324,10 @@ static int vcn_v5_0_0_process_interrupt(struct amdgpu_device *adev, struct amdgp DRM_DEBUG("IH: VCN TRAP\n"); switch (entry->src_id) { - case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE: amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); break; - case VCN_4_0__SRCID_UVD_POISON: + case VCN_5_0__SRCID_UVD_POISON: amdgpu_vcn_process_poison_irq(adev, source, entry); break; default: @@ -1351,7 +1363,8 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) } } -static void vcn_v5_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +void vcn_v5_0_0_print_ip_state(struct amdgpu_ip_block *ip_block, + struct drm_printer *p) { struct amdgpu_device *adev = ip_block->adev; int i, j; @@ -1383,7 +1396,7 @@ static void vcn_v5_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm } } -static void vcn_v5_0_dump_ip_state(struct amdgpu_ip_block *ip_block) +void vcn_v5_0_0_dump_ip_state(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; int i, j; @@ -1424,8 +1437,8 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .wait_for_idle = vcn_v5_0_0_wait_for_idle, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, .set_powergating_state = vcn_v5_0_0_set_powergating_state, - .dump_ip_state = vcn_v5_0_dump_ip_state, - .print_ip_state = vcn_v5_0_print_ip_state, + .dump_ip_state = vcn_v5_0_0_dump_ip_state, + .print_ip_state = vcn_v5_0_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h index 51bbccd4360f..b8927652bc50 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h @@ -32,6 +32,11 @@ #define VCN_VID_IP_ADDRESS 0x0 #define VCN_AON_IP_ADDRESS 0x30000 +void vcn_v5_0_0_alloc_ip_dump(struct amdgpu_device *adev); +void vcn_v5_0_0_print_ip_state(struct amdgpu_ip_block *ip_block, + struct drm_printer *p); +void vcn_v5_0_0_dump_ip_state(struct amdgpu_ip_block *ip_block); + extern const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block; #endif /* __VCN_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c new file mode 100644 index 000000000000..8b463c977d08 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -0,0 +1,1118 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "soc15_hw_ip.h" +#include "vcn_v2_0.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" +#include "vcn_v5_0_0.h" +#include "vcn_v5_0_1.h" + +#include <drm/drm_drv.h> + +static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev); +static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state); +static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring); + +/** + * vcn_v5_0_1_early_init - set function pointers and load microcode + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Set ring and irq function pointers + * Load microcode from filesystem + */ +static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + /* re-use enc ring as unified ring */ + adev->vcn.num_enc_rings = 1; + + vcn_v5_0_1_set_unified_ring_funcs(adev); + vcn_v5_0_1_set_irq_funcs(adev); + + return amdgpu_vcn_early_init(adev); +} + +/** + * vcn_v5_0_1_sw_init - sw init for VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Load firmware and sw initialization + */ +static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, r, vcn_inst; + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + amdgpu_vcn_setup_ucode(adev); + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + /* VCN UNIFIED TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + + vcn_inst = GET_INST(VCN, i); + + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst; + + ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); + sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); + + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, + AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score); + if (r) + return r; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = true; + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + } + + /* TODO: Add queue reset mask when FW fully supports it */ + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + + vcn_v5_0_0_alloc_ip_dump(adev); + + return amdgpu_vcn_sysfs_reset_mask_init(adev); +} + +/** + * vcn_v5_0_1_sw_fini - sw fini for VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * VCN suspend and free up sw allocation + */ +static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, r, idx; + + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sq.is_enabled = 0; + } + + drm_dev_exit(idx); + } + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + amdgpu_vcn_sysfs_reset_mask_fini(adev); + + kfree(adev->vcn.ip_dump); + + return r; +} + +/** + * vcn_v5_0_1_hw_init - start and test VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, r, vcn_inst; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + ring = &adev->vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst), + adev->vcn.inst[i].aid_id); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + + return 0; +} + +/** + * vcn_v5_0_1_hw_fini - stop the hardware block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + return 0; +} + +/** + * vcn_v5_0_1_suspend - suspend VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * HW fini and suspend VCN block + */ +static int vcn_v5_0_1_suspend(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = vcn_v5_0_1_hw_fini(ip_block); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v5_0_1_resume - resume VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Resume firmware and hw init VCN block + */ +static int vcn_v5_0_1_resume(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v5_0_1_hw_init(ip_block); + + return r; +} + +/** + * vcn_v5_0_1_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v5_0_1_mc_resume(struct amdgpu_device *adev, int inst) +{ + uint32_t offset, size, vcn_inst; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + vcn_inst = GET_INST(VCN, inst); + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr)); + offset = size; + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared))); +} + +/** + * vcn_v5_0_1_mc_resume_dpg_mode - memory controller programming for dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Let the VCN memory controller know it's offsets with dpg mode + */ +static void vcn_v5_0_1_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + uint32_t offset, size; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn4_fw_shared)), 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); +} + +/** + * vcn_v5_0_1_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Disable clock gating for VCN block + */ +static void vcn_v5_0_1_disable_clock_gating(struct amdgpu_device *adev, int inst) +{ +} + +/** + * vcn_v5_0_1_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @inst: instance number + * + * Enable clock gating for VCN block + */ +static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_device *adev, int inst) +{ +} + +/** + * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * @indirect: indirectly write sram + * + * Start VCN block with dpg mode + */ +static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared = + adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_ring *ring; + int vcn_inst; + uint32_t tmp; + + vcn_inst = GET_INST(VCN, inst_idx); + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp); + + if (indirect) { + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = + (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */ + WREG32_SOC24_DPG_MODE(inst_idx, 0xDEADBEEF, + adev->vcn.inst[inst_idx].aid_id, 0, true); + } + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect); + + /* setup regUVD_LMI_CTRL */ + tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect); + + vcn_v5_0_1_mc_resume_dpg_mode(adev, inst_idx, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable LMI MC and UMC channels */ + tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) + amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / sizeof(uint32_t)); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); + + return 0; +} + +/** + * vcn_v5_0_1_start - VCN start + * + * @adev: amdgpu_device pointer + * + * Start VCN block + */ +static int vcn_v5_0_1_start(struct amdgpu_device *adev) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + struct amdgpu_ring *ring; + uint32_t tmp; + int i, j, k, r, vcn_inst; + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, true); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v5_0_1_start_dpg_mode(adev, i, adev->vcn.indirect_sram); + continue; + } + + vcn_inst = GET_INST(VCN, i); + + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + + /* setup regUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + vcn_v5_0_1_mc_resume(adev, i); + + /* VCN global tiling registers */ + WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); + if (status & 2) + break; + mdelay(100); + if (amdgpu_emu_mode == 1) + msleep(20); + } + + if (amdgpu_emu_mode == 1) { + r = -1; + if (status & 2) { + r = 0; + break; + } + } else { + r = 0; + if (status & 2) + break; + + dev_err(adev->dev, + "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + } + + if (r) { + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + ring = &adev->vcn.inst[i].ring_enc[0]; + + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); + + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + } + + return 0; +} + +/** + * vcn_v5_0_1_stop_dpg_mode - VCN stop with dpg mode + * + * @adev: amdgpu_device pointer + * @inst_idx: instance number index + * + * Stop VCN block with dpg mode + */ +static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +{ + uint32_t tmp; + int vcn_inst; + + vcn_inst = GET_INST(VCN, inst_idx); + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); +} + +/** + * vcn_v5_0_1_stop - VCN stop + * + * @adev: amdgpu_device pointer + * + * Stop VCN block + */ +static int vcn_v5_0_1_stop(struct amdgpu_device *adev) +{ + volatile struct amdgpu_vcn4_fw_shared *fw_shared; + uint32_t tmp; + int i, r = 0, vcn_inst; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + vcn_v5_0_1_stop_dpg_mode(adev, i); + continue; + } + + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* disable LMI UMC channel */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* apply soft reset */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + + /* clear status */ + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0); + } + + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + + return 0; +} + +/** + * vcn_v5_0_1_unified_ring_get_rptr - get unified read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified read pointer + */ +static uint64_t vcn_v5_0_1_unified_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR); +} + +/** + * vcn_v5_0_1_unified_ring_get_wptr - get unified write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified write pointer + */ +static uint64_t vcn_v5_0_1_unified_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR); +} + +/** + * vcn_v5_0_1_unified_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR, + lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .get_rptr = vcn_v5_0_1_unified_ring_get_rptr, + .get_wptr = vcn_v5_0_1_unified_ring_get_wptr, + .set_wptr = vcn_v5_0_1_unified_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_unified_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v5_0_1_set_unified_ring_funcs - set unified ring functions + * + * @adev: amdgpu_device pointer + * + * Set unified ring functions + */ +static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev) +{ + int i, vcn_inst; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_1_unified_ring_vm_funcs; + adev->vcn.inst[i].ring_enc[0].me = i; + vcn_inst = GET_INST(VCN, i); + adev->vcn.inst[i].aid_id = vcn_inst / adev->vcn.num_inst_per_aid; + } +} + +/** + * vcn_v5_0_1_is_idle - check VCN block is idle + * + * @handle: amdgpu_device pointer + * + * Check whether VCN block is idle + */ +static bool vcn_v5_0_1_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == UVD_STATUS__IDLE); + + return ret; +} + +/** + * vcn_v5_0_1_wait_for_idle - wait for VCN block idle + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Wait for VCN block idle + */ +static int vcn_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE); + if (ret) + return ret; + } + + return ret; +} + +/** + * vcn_v5_0_1_set_clockgating_state - set VCN block clockgating state + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * @state: clock gating state + * + * Set VCN block clockgating state + */ +static int vcn_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + bool enable = state == AMD_CG_STATE_GATE; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (enable) { + if (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) != UVD_STATUS__IDLE) + return -EBUSY; + vcn_v5_0_1_enable_clock_gating(adev, i); + } else { + vcn_v5_0_1_disable_clock_gating(adev, i); + } + } + + return 0; +} + +/** + * vcn_v5_0_1_set_powergating_state - set VCN block powergating state + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * @state: power gating state + * + * Set VCN block powergating state + */ +static int vcn_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret; + + if (state == adev->vcn.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v5_0_1_stop(adev); + else + ret = vcn_v5_0_1_start(adev); + + if (!ret) + adev->vcn.cur_state = state; + + return ret; +} + +/** + * vcn_v5_0_1_process_interrupt - process VCN block interrupt + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @entry: interrupt entry from clients and sources + * + * Process VCN block interrupt + */ +static int vcn_v5_0_1_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t i, inst; + + i = node_id_to_phys_map[entry->node_id]; + + DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n"); + + for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst) + if (adev->vcn.inst[inst].aid_id == i) + break; + if (inst >= adev->vcn.num_vcn_inst) { + dev_WARN_ONCE(adev->dev, 1, + "Interrupt received for unknown VCN instance %d", + entry->node_id); + return 0; + } + + switch (entry->src_id) { + case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v5_0_1_irq_funcs = { + .process = vcn_v5_0_1_process_interrupt, +}; + +/** + * vcn_v5_0_1_set_irq_funcs - set VCN block interrupt irq functions + * + * @adev: amdgpu_device pointer + * + * Set VCN block interrupt irq functions + */ +static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + adev->vcn.inst->irq.num_types++; + adev->vcn.inst->irq.funcs = &vcn_v5_0_1_irq_funcs; +} + +static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = { + .name = "vcn_v5_0_1", + .early_init = vcn_v5_0_1_early_init, + .late_init = NULL, + .sw_init = vcn_v5_0_1_sw_init, + .sw_fini = vcn_v5_0_1_sw_fini, + .hw_init = vcn_v5_0_1_hw_init, + .hw_fini = vcn_v5_0_1_hw_fini, + .suspend = vcn_v5_0_1_suspend, + .resume = vcn_v5_0_1_resume, + .is_idle = vcn_v5_0_1_is_idle, + .wait_for_idle = vcn_v5_0_1_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v5_0_1_set_clockgating_state, + .set_powergating_state = vcn_v5_0_1_set_powergating_state, + .dump_ip_state = vcn_v5_0_0_dump_ip_state, + .print_ip_state = vcn_v5_0_0_print_ip_state, +}; + +const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 5, + .minor = 0, + .rev = 1, + .funcs = &vcn_v5_0_1_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h new file mode 100644 index 000000000000..82ac709f44bf --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h @@ -0,0 +1,29 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_v5_0_1_H__ +#define __VCN_v5_0_1_H__ + +extern const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block; + +#endif /* __VCN_v5_0_1_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 0fedadd0a6a4..98fc6941159e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -364,9 +364,8 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, * this should allow us to catchup. */ tmp = (wptr + 32) & ih->ptr_mask; - dev_warn(adev->dev, "IH ring buffer overflow " - "(0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, tmp); + dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp); ih->rptr = tmp; tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); @@ -605,10 +604,10 @@ static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev, } } -static int vega10_ih_set_clockgating_state(void *handle, +static int vega10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; vega10_ih_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -616,7 +615,7 @@ static int vega10_ih_set_clockgating_state(void *handle, } -static int vega10_ih_set_powergating_state(void *handle, +static int vega10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 1c9aff742e43..e9e3b2ed4b7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -366,6 +366,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) /* Enable IH Retry CAM */ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0) || amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 4) || amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 5)) WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN, ENABLE, 1); @@ -443,9 +444,8 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, * this should allow us to catchup. */ tmp = (wptr + 32) & ih->ptr_mask; - dev_warn(adev->dev, "IH ring buffer overflow " - "(0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, tmp); + dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp); ih->rptr = tmp; tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); @@ -697,10 +697,10 @@ static void vega20_ih_update_clockgating_state(struct amdgpu_device *adev, } } -static int vega20_ih_set_clockgating_state(void *handle, +static int vega20_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; vega20_ih_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -708,7 +708,7 @@ static int vega20_ih_set_clockgating_state(void *handle, } -static int vega20_ih_set_powergating_state(void *handle, +static int vega20_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index a83505815d39..06615f160331 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1945,10 +1945,10 @@ static int vi_common_set_clockgating_state_by_smu(void *handle, return 0; } -static int vi_common_set_clockgating_state(void *handle, +static int vi_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1988,7 +1988,7 @@ static int vi_common_set_clockgating_state(void *handle, return 0; } -static int vi_common_set_powergating_state(void *handle, +static int vi_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 02f7ba8c93cd..388b44ed5928 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -4122,3 +4122,494 @@ static const uint32_t cwsr_trap_gfx12_hex[] = { 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0x00000000, }; + +static const uint32_t cwsr_trap_gfx9_5_0_hex[] = { + 0xbf820001, 0xbf8202d8, + 0xb8f8f802, 0x8978ff78, + 0x00020006, 0xb8fbf803, + 0x866eff78, 0x00002000, + 0xbf840008, 0xbf0d986d, + 0xbf85001f, 0x866eff7b, + 0x00000400, 0xbf850061, + 0xbf8e0010, 0xb8fbf803, + 0xbf82fffa, 0x866eff7b, + 0x03800900, 0xbf850015, + 0x866eff7b, 0x000071ff, + 0xbf840008, 0x866fff7b, + 0x00007080, 0xbf840001, + 0xbeee1a87, 0xb8eff801, + 0x8e6e8c6e, 0x866e6f6e, + 0xbf85000a, 0xbf0d986d, + 0xbf850003, 0x866eff6d, + 0x00ff0000, 0xbf850005, + 0xbf0d986d, 0xbf850004, + 0x866eff7b, 0x00000400, + 0xbf850046, 0xbeed1a9d, + 0xb8faf807, 0x867aff7a, + 0x001f8000, 0x8e7a8b7a, + 0x8979ff79, 0xfc000000, + 0x87797a79, 0xba7ff807, + 0x00000000, 0xb8faf812, + 0xb8fbf813, 0x8efa887a, + 0xbf0d8f7b, 0xbf840002, + 0x877bff7b, 0xffff0000, + 0xc0031cfd, 0x00000010, + 0xc0071bbd, 0x00000000, + 0xc0071ebd, 0x00000008, + 0xbf8cc07f, 0x8e739773, + 0x8979ff79, 0x01800000, + 0x87797379, 0xbf0d986d, + 0xbf840009, 0xbf0d9879, + 0xbf850007, 0x896dff6d, + 0x01ff0000, 0xba7f0583, + 0x00000000, 0xbf0d9d6d, + 0xbeed189d, 0xbf840012, + 0xbef91898, 0xbeed189d, + 0x86ee6e6e, 0xbf840001, + 0xbe801d6e, 0x866eff6d, + 0x01ff0000, 0xbf850005, + 0x8778ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xbf820005, 0x866eff6d, + 0x01000000, 0xbf850002, + 0x806c846c, 0x826d806d, + 0x866dff6d, 0x0000ffff, + 0x8f7a8b79, 0x867aff7a, + 0x001f8000, 0xb97af807, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e8378, 0xb96ee0c2, + 0xbf800002, 0xb9780002, + 0xbe801f6c, 0x866dff6d, + 0x0000ffff, 0xbefa0080, + 0xb97a0283, 0xb8faf807, + 0x867aff7a, 0x001f8000, + 0x8e7a8b7a, 0x8979ff79, + 0xfc000000, 0x87797a79, + 0xba7ff807, 0x00000000, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbf900004, + 0x877a8478, 0xb97af802, + 0xbf8e0002, 0xbf88fffe, + 0xb8fa2985, 0x807a817a, + 0x8e7a8a7a, 0x8e7a817a, + 0xb8fb1605, 0x807b817b, + 0x8e7b867b, 0x807a7b7a, + 0x807a7e7a, 0x827b807f, + 0x867bff7b, 0x0000ffff, + 0xc04b1c3d, 0x00000050, + 0xbf8cc07f, 0xc04b1d3d, + 0x00000060, 0xbf8cc07f, + 0xc0431e7d, 0x00000074, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0xbef1007c, + 0xbef00080, 0xb8f02985, + 0x80708170, 0x8e708a70, + 0x8e708170, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0xbef60084, + 0xbef600ff, 0x01000000, + 0xbefe007c, 0xbefc0070, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611b3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611b7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611bba, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611bfa, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611e3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8fbf803, 0xbefe007c, + 0xbefc0070, 0xc0611efa, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611a3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611a7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8f1f801, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf85004d, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, + 0x80048104, 0xd2890002, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0xbefe00c1, 0xbeff00c1, + 0xb8fb5306, 0x867bc17b, + 0xbf840052, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf84004e, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02985, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf85001d, + 0x24040682, 0xd86c0000, + 0x00000002, 0xbf8cc07f, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x680404ff, + 0x00000100, 0xd0c9006a, + 0x0000f702, 0xbf87ffe5, + 0xbf820016, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe800077, + 0x8677ff77, 0xff7fffff, + 0x8777ff77, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8cc07f, 0xe0765000, + 0x701d0002, 0x68040702, + 0xd0c9006a, 0x0000f702, + 0xbefe016a, 0xbf87fff6, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2b05, + 0x807b817b, 0x8e7b827b, + 0xbef600ff, 0x01000000, + 0xbefc0084, 0xbf0a7b7c, + 0xbf84006d, 0xbf11017c, + 0x807bff7b, 0x00001000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf850051, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, + 0x80048104, 0xd2890002, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0x807c847c, + 0xbf0a7b7c, 0xbf85ffb1, + 0xbf9c0000, 0xbf820012, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0x807c847c, 0x8070ff70, + 0x00000400, 0xbf0a7b7c, + 0xbf85ffef, 0xbf9c0000, + 0xb8fb2985, 0x807b817b, + 0x8e7b837b, 0xb8fa2b05, + 0x807a817a, 0x8e7a827a, + 0x80fb7a7b, 0x867b7b7b, + 0xbf84007a, 0x807bff7b, + 0x00001000, 0xbefc0080, + 0xbf11017c, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850059, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x807c847c, 0xbf0a7b7c, + 0xbf85ffa9, 0xbf9c0000, + 0xbf820016, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffeb, + 0xbf9c0000, 0xbf8200f4, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x04000000, + 0xbf840025, 0xbefe00c1, + 0xbeff00c1, 0xb8ef5306, + 0x866fc16f, 0xbf840020, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82985, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0xe0510200, 0x781d0000, + 0xe0510300, 0x781d0000, + 0xe0510400, 0x781d0000, + 0x807cff7c, 0x00000500, + 0x8078ff78, 0x00000500, + 0xbf0a6f7c, 0xbf85fff0, + 0xbefe00c1, 0xbeff00c1, + 0xbef600ff, 0x01000000, + 0xb8ef2b05, 0x806f816f, + 0x8e6f826f, 0x806fff6f, + 0x00008000, 0xbef80080, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xb8ef2985, + 0x806f816f, 0x8e6f836f, + 0xb8f92b05, 0x80798179, + 0x8e798279, 0x80ef796f, + 0x866f6f6f, 0xbf84001a, + 0x806fff6f, 0x00008000, + 0xbefc0080, 0xbf11087c, + 0xe0524000, 0x781d0000, + 0xe0524100, 0x781d0100, + 0xe0524200, 0x781d0200, + 0xe0524300, 0x781d0300, + 0xbf8c0f70, 0xd3d94000, + 0x18000100, 0xd3d94001, + 0x18000101, 0xd3d94002, + 0x18000102, 0xd3d94003, + 0x18000103, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffea, + 0xbf9c0000, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xbf8c0f70, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x80f8c078, 0xb8ef1605, + 0x806f816f, 0x8e6f846f, + 0x8e76826f, 0xbef600ff, + 0x01000000, 0xbefc006f, + 0xc031003a, 0x00000078, + 0x80f8c078, 0xbf8cc07f, + 0x80fc907c, 0xbf800000, + 0xbe802d00, 0xbe822d02, + 0xbe842d04, 0xbe862d06, + 0xbe882d08, 0xbe8a2d0a, + 0xbe8c2d0c, 0xbe8e2d0e, + 0xbf06807c, 0xbf84fff0, + 0xb8f82985, 0x80788178, + 0x8e788a78, 0x8e788178, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, + 0x00000078, 0x80788478, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, + 0x00000078, 0x80788478, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, + 0x00000078, 0x80788478, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, + 0x00000078, 0x80788478, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, + 0x00000078, 0x80788478, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2985, + 0x806e816e, 0x8e6e8a6e, + 0x8e6e816e, 0xb8ef1605, + 0x806f816f, 0x8e6f866f, + 0x806e6f6e, 0x806e746e, + 0x826f8075, 0x866fff6f, + 0x0000ffff, 0xc00b1c37, + 0x00000050, 0xc00b1d37, + 0x00000060, 0xc0031e77, + 0x00000074, 0xbf8cc07f, + 0x8f6e8b79, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0xbe801f6c, + 0xbf9b0000, 0x00000000, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index bb26338204f4..0eabb7a8cab9 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -37,17 +37,28 @@ * gc_9_4_3: * cpp -DASIC_FAMILY=GC_9_4_3 cwsr_trap_handler_gfx9.asm -P -o gc_9_4_3.sp3 * sp3 gc_9_4_3.sp3 -hex gc_9_4_3.hex + * + * gc_9_5_0: + * cpp -DASIC_FAMILY=GC_9_5_0 cwsr_trap_handler_gfx9.asm -P -o gc_9_5_0.sp3 + * sp3 gc_9_5_0.sp3 -hex gc_9_5_0.hex */ #define CHIP_VEGAM 18 #define CHIP_ARCTURUS 23 #define CHIP_ALDEBARAN 25 #define CHIP_GC_9_4_3 26 +#define CHIP_GC_9_5_0 27 var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger var SINGLE_STEP_MISSED_WORKAROUND = (ASIC_FAMILY <= CHIP_ALDEBARAN) //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised +#if ASIC_FAMILY < CHIP_GC_9_4_3 +#define VMEM_MODIFIERS slc:1 glc:1 +#else +#define VMEM_MODIFIERS sc0:1 nt:1 +#endif + /**************************************************************************/ /* variables */ /**************************************************************************/ @@ -62,7 +73,13 @@ var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000 var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 +#if ASIC_FAMILY >= CHIP_GC_9_5_0 +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 11 +var LDS_RESTORE_GRANULARITY_BYTES = 1280 +#else var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +var LDS_RESTORE_GRANULARITY_BYTES = 512 +#endif var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 @@ -557,12 +574,21 @@ if SAVE_AFTER_XNACK_ERROR v_lshlrev_b32 v2, 2, v3 L_SAVE_LDS_LOOP_SQC: +#if ASIC_FAMILY < CHIP_GC_9_5_0 ds_read2_b32 v[0:1], v2 offset0:0 offset1:0x40 s_waitcnt lgkmcnt(0) - write_vgprs_to_mem_with_sqc(v0, 2, s_save_buf_rsrc0, s_save_mem_offset) v_add_u32 v2, 0x200, v2 +#else + // gfx950 needs to save in multiple of 256 bytes. + ds_read_b32 v0, v2 + s_waitcnt lgkmcnt(0) + write_vgprs_to_mem_with_sqc(v0, 1, s_save_buf_rsrc0, s_save_mem_offset) + + v_add_u32 v2, 0x100, v2 +#endif + v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size s_cbranch_vccnz L_SAVE_LDS_LOOP_SQC @@ -581,11 +607,14 @@ end L_SAVE_LDS_LOOP_VECTOR: ds_read_b64 v[0:1], v2 //x =LDS[a], byte address s_waitcnt lgkmcnt(0) - buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1 + buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset VMEM_MODIFIERS offen:1 // s_waitcnt vmcnt(0) // v_add_u32 v2, vcc[0:1], v2, v3 v_add_u32 v2, v2, v3 v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size +#if ASIC_FAMILY >= CHIP_GC_9_5_0 + s_mov_b64 exec, vcc +#endif s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR // restore rsrc3 @@ -748,8 +777,13 @@ L_RESTORE: L_RESTORE_LDS_LOOP: buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW - s_add_u32 m0, m0, 256*2 // 128 DW - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW +#if ASIC_FAMILY >= CHIP_GC_9_5_0 + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:512 // third 64DW + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:768 // forth 64DW + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:1024 // fifth 64DW +#endif + s_add_u32 m0, m0, LDS_RESTORE_GRANULARITY_BYTES // 128/320 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, LDS_RESTORE_GRANULARITY_BYTES //mem offset increased by 128/320 DW s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete? @@ -979,17 +1013,17 @@ L_TCP_STORE_CHECK_DONE: end function write_4vgprs_to_mem(s_rsrc, s_mem_offset) - buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1 - buffer_store_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256 - buffer_store_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2 - buffer_store_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3 + buffer_store_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS + buffer_store_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256 + buffer_store_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2 + buffer_store_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3 end function read_4vgprs_from_mem(s_rsrc, s_mem_offset) - buffer_load_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1 - buffer_load_dword v1, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256 - buffer_load_dword v2, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*2 - buffer_load_dword v3, v0, s_rsrc, s_mem_offset slc:1 glc:1 offset:256*3 + buffer_load_dword v0, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS + buffer_load_dword v1, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256 + buffer_load_dword v2, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*2 + buffer_load_dword v3, v0, s_rsrc, s_mem_offset VMEM_MODIFIERS offset:256*3 s_waitcnt vmcnt(0) end diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 7b826a136ceb..693469c18c60 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -1423,6 +1423,7 @@ err: static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, + bool cache_line_size_missing, struct kfd_gpu_cache_info *pcache_info) { struct amdgpu_device *adev = kdev->adev; @@ -1437,6 +1438,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2; pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 128; i++; } /* Scalar L1 Instruction Cache per SQC */ @@ -1449,6 +1452,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 128; i++; } /* Scalar L1 Data Cache per SQC */ @@ -1460,6 +1465,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2; pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 64; i++; } /* GL1 Data Cache per SA */ @@ -1472,7 +1479,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; - pcache_info[i].cache_line_size = 0; + if (cache_line_size_missing) + pcache_info[i].cache_line_size = 128; i++; } /* L2 Data Cache per GPU (Total Tex Cache) */ @@ -1484,6 +1492,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size; + if (cache_line_size_missing && !pcache_info[i].cache_line_size) + pcache_info[i].cache_line_size = 128; i++; } /* L3 Data Cache per GPU */ @@ -1494,7 +1504,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev, CRAT_CACHE_FLAGS_DATA_CACHE | CRAT_CACHE_FLAGS_SIMD_CACHE); pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh; - pcache_info[i].cache_line_size = 0; + pcache_info[i].cache_line_size = 64; i++; } return i; @@ -1569,6 +1579,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config_v2(struct kfd_dev *kdev, int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pcache_info) { int num_of_cache_types = 0; + bool cache_line_size_missing = false; switch (kdev->adev->asic_type) { case CHIP_KAVERI: @@ -1628,6 +1639,7 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): num_of_cache_types = kfd_fill_gpu_cache_info_from_gfx_config_v2(kdev->kfd, *pcache_info); @@ -1692,10 +1704,17 @@ int kfd_get_gpu_cache_info(struct kfd_node *kdev, struct kfd_gpu_cache_info **pc case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + /* Cacheline size not available in IP discovery for gc11. + * kfd_fill_gpu_cache_info_from_gfx_config to hard code it + */ + cache_line_size_missing = true; + fallthrough; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): num_of_cache_types = - kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, *pcache_info); + kfd_fill_gpu_cache_info_from_gfx_config(kdev->kfd, + cache_line_size_missing, + *pcache_info); break; default: *pcache_info = dummy_cache_info; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h index 924d0fd85dfb..27aa1a5b120f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h @@ -79,6 +79,7 @@ static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_node *dev) return (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) || KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0) || KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 9b51dd75fefc..a29374c86405 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -85,6 +85,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) case IP_VERSION(4, 4, 0):/* ALDEBARAN */ case IP_VERSION(4, 4, 2): case IP_VERSION(4, 4, 5): + case IP_VERSION(4, 4, 4): case IP_VERSION(5, 0, 0):/* NAVI10 */ case IP_VERSION(5, 0, 1):/* CYAN_SKILLFISH */ case IP_VERSION(5, 0, 2):/* NAVI14 */ @@ -152,6 +153,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd) break; case IP_VERSION(9, 4, 3): /* GC 9.4.3 */ case IP_VERSION(9, 4, 4): /* GC 9.4.4 */ + case IP_VERSION(9, 5, 0): /* GC 9.5.0 */ kfd->device_info.event_interrupt_class = &event_interrupt_class_v9_4_3; break; @@ -356,6 +358,10 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) gfx_target_version = 90402; f2g = &gc_9_4_3_kfd2kgd; break; + case IP_VERSION(9, 5, 0): + gfx_target_version = 90500; + f2g = &gc_9_4_3_kfd2kgd; + break; /* Navi10 */ case IP_VERSION(10, 1, 10): gfx_target_version = 100100; @@ -515,6 +521,10 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) > KFD_CWSR_TMA_OFFSET); kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex); + } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 5, 0)) { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_5_0_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx9_5_0_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_5_0_hex); } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) { BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > KFD_CWSR_TMA_OFFSET); @@ -567,6 +577,7 @@ static int kfd_gws_init(struct kfd_node *node) && kfd->mec2_fw_version >= 0x28) || (KFD_GC_VERSION(node) == IP_VERSION(9, 4, 3) || KFD_GC_VERSION(node) == IP_VERSION(9, 4, 4)) || + (KFD_GC_VERSION(node) == IP_VERSION(9, 5, 0)) || (KFD_GC_VERSION(node) >= IP_VERSION(10, 3, 0) && KFD_GC_VERSION(node) < IP_VERSION(11, 0, 0) && kfd->mec2_fw_version >= 0x6b) || @@ -638,6 +649,14 @@ static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes) struct kfd_node *knode; unsigned int i; + /* + * flush_work ensures that there are no outstanding + * work-queue items that will access interrupt_ring. New work items + * can't be created because we stopped interrupt handling above. + */ + flush_workqueue(kfd->ih_wq); + destroy_workqueue(kfd->ih_wq); + for (i = 0; i < num_nodes; i++) { knode = kfd->nodes[i]; device_queue_manager_uninit(knode->dqm); @@ -733,14 +752,14 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1; - /* For GFX9.4.3, we need special handling for VMIDs depending on - * partition mode. + /* For multi-partition capable GPUs, we need special handling for VMIDs + * depending on partition mode. * In CPX mode, the VMID range needs to be shared between XCDs. * Additionally, there are 13 VMIDs (3-15) available for KFD. To * divide them equally, we change starting VMID to 4 and not use * VMID 3. - * If the VMID range changes for GFX9.4.3, then this code MUST be - * revisited. + * If the VMID range changes for multi-partition capable GPUs, then + * this code MUST be revisited. */ if (kfd->adev->xcp_mgr) { partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr, @@ -805,14 +824,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; /* - * For GFX9.4.3, the KFD abstracts all partitions within a socket as - * xGMI connected in the topology so assign a unique hive id per - * device based on the pci device location if device is in PCIe mode. + * For multi-partition capable GPUs, the KFD abstracts all partitions + * within a socket as xGMI connected in the topology so assign a unique + * hive id per device based on the pci device location if device is in + * PCIe mode. */ - if (!kfd->hive_id && - (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) && - kfd->num_nodes > 1) + if (!kfd->hive_id && kfd->num_nodes > 1) kfd->hive_id = pci_dev_id(kfd->adev->pdev); kfd->noretry = kfd->adev->gmc.noretry; @@ -850,12 +867,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20); } - if ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) && - partition_mode == AMDGPU_CPX_PARTITION_MODE && + if (partition_mode == AMDGPU_CPX_PARTITION_MODE && kfd->num_nodes != 1) { - /* For GFX9.4.3 and CPX mode, first XCD gets VMID range - * 4-9 and second XCD gets VMID range 10-15. + /* For multi-partition capable GPUs and CPX mode, first + * XCD gets VMID range 4-9 and second XCD gets VMID + * range 10-15. */ node->vm_info.first_vmid_kfd = (i%2 == 0) ? @@ -879,8 +895,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, amdgpu_amdkfd_get_local_mem_info(kfd->adev, &node->local_mem_info, node->xcp); - if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) + if (kfd->adev->xcp_mgr) kfd_setup_interrupt_bitmap(node, i); /* Initialize the KFD node */ @@ -1059,21 +1074,6 @@ static int kfd_resume(struct kfd_node *node) return err; } -static inline void kfd_queue_work(struct workqueue_struct *wq, - struct work_struct *work) -{ - int cpu, new_cpu; - - cpu = new_cpu = smp_processor_id(); - do { - new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids; - if (cpu_to_node(new_cpu) == numa_node_id()) - break; - } while (cpu != new_cpu); - - queue_work_on(new_cpu, wq, work); -} - /* This is called directly from KGD at ISR. */ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) { @@ -1099,7 +1099,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) patched_ihre, &is_patched) && enqueue_ih_ring_entry(node, is_patched ? patched_ihre : ih_ring_entry)) { - kfd_queue_work(node->ih_wq, &node->interrupt_work); + queue_work(node->kfd->ih_wq, &node->interrupt_work); spin_unlock_irqrestore(&node->interrupt_lock, flags); return; } @@ -1514,6 +1514,73 @@ bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) return kfd_compute_active(node); } +/** + * kgd2kfd_vmfault_fast_path() - KFD vm page fault interrupt handling fast path for gmc v9 + * @adev: amdgpu device + * @entry: vm fault interrupt vector + * @retry_fault: if this is retry fault + * + * retry fault - + * with CAM enabled, adev primary ring + * | gmc_v9_0_process_interrupt() + * adev soft_ring + * | gmc_v9_0_process_interrupt() worker failed to recover page fault + * KFD node ih_fifo + * | KFD interrupt_wq worker + * kfd_signal_vm_fault_event + * + * without CAM, adev primary ring1 + * | gmc_v9_0_process_interrupt worker failed to recvoer page fault + * KFD node ih_fifo + * | KFD interrupt_wq worker + * kfd_signal_vm_fault_event + * + * no-retry fault - + * adev primary ring + * | gmc_v9_0_process_interrupt() + * KFD node ih_fifo + * | KFD interrupt_wq worker + * kfd_signal_vm_fault_event + * + * fast path - After kfd_signal_vm_fault_event, gmc_v9_0_process_interrupt drop the page fault + * of same process, don't copy interrupt to KFD node ih_fifo. + * With gdb debugger enabled, need convert the retry fault to no-retry fault for + * debugger, cannot use the fast path. + * + * Return: + * true - use the fast path to handle this fault + * false - use normal path to handle it + */ +bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, + bool retry_fault) +{ + struct kfd_process *p; + u32 cam_index; + + if (entry->ih == &adev->irq.ih_soft || entry->ih == &adev->irq.ih1) { + p = kfd_lookup_process_by_pasid(entry->pasid); + if (!p) + return true; + + if (p->gpu_page_fault && !p->debug_trap_enabled) { + if (retry_fault && adev->irq.retry_cam_enabled) { + cam_index = entry->src_data[2] & 0x3ff; + WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); + } + + kfd_unref_process(p); + return true; + } + + /* + * This is the first page fault, set flag and then signal user space + */ + p->gpu_page_fault = true; + kfd_unref_process(p); + } + return false; +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c79fe9069e22..1405e8affd48 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -207,6 +207,21 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, if (!down_read_trylock(&adev->reset_domain->sem)) return -EIO; + if (!pdd->proc_ctx_cpu_ptr) { + r = amdgpu_amdkfd_alloc_gtt_mem(adev, + AMDGPU_MES_PROC_CTX_SIZE, + &pdd->proc_ctx_bo, + &pdd->proc_ctx_gpu_addr, + &pdd->proc_ctx_cpu_ptr, + false); + if (r) { + dev_err(adev->dev, + "failed to allocate process context bo\n"); + return r; + } + memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); + } + memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); queue_input.process_id = qpd->pqm->process->pasid; queue_input.page_table_base_addr = qpd->page_table_base; @@ -2373,6 +2388,9 @@ static int wait_on_destroy_queue(struct device_queue_manager *dqm, q->process); int ret = 0; + if (WARN_ON(!pdd)) + return ret; + if (pdd->qpd.is_debug) return ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index 210bcc048f4c..67137e674f1d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -64,7 +64,8 @@ static int update_qpd_v9(struct device_queue_manager *dqm, qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; if (KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4)) + KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(dqm->dev->kfd) == IP_VERSION(9, 5, 0)) qpd->sh_mem_config |= (1 << SH_MEM_CONFIG__F8_MODE__SHIFT); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index ea3792249209..d075f24e5f9f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -748,6 +748,16 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, uint64_t *slots = page_slots(p->signal_page); uint32_t id; + /* + * If id is valid but slot is not signaled, GPU may signal the same event twice + * before driver have chance to process the first interrupt, then signal slot is + * auto-reset after set_event wakeup the user space, just drop the second event as + * the application only need wakeup once. + */ + if ((valid_id_bits > 31 || (1U << valid_id_bits) >= KFD_SIGNAL_EVENT_LIMIT) && + partial_id < KFD_SIGNAL_EVENT_LIMIT && slots[partial_id] == UNSIGNALED_EVENT_SLOT) + goto out_unlock; + if (valid_id_bits) pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n", partial_id, valid_id_bits); @@ -776,6 +786,7 @@ void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, } } +out_unlock: rcu_read_unlock(); kfd_unref_process(p); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index d46a13156ee9..0cb5c582ce7d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -184,6 +184,7 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, } else { reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; } + amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__GFX); break; case SOC15_IH_CLIENTID_VMC: case SOC15_IH_CLIENTID_VMC1: @@ -213,6 +214,7 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev, } else { reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET; } + amdgpu_ras_set_err_poison(dev->adev, AMDGPU_RAS_BLOCK__SDMA); break; default: dev_warn(dev->adev->dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 9b6b6e882593..783c2f5a04e4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -46,7 +46,7 @@ #include <linux/kfifo.h> #include "kfd_priv.h" -#define KFD_IH_NUM_ENTRIES 8192 +#define KFD_IH_NUM_ENTRIES 16384 static void interrupt_wq(struct work_struct *); @@ -62,11 +62,14 @@ int kfd_interrupt_init(struct kfd_node *node) return r; } - node->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1); - if (unlikely(!node->ih_wq)) { - kfifo_free(&node->ih_fifo); - dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n"); - return -ENOMEM; + if (!node->kfd->ih_wq) { + node->kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI | WQ_UNBOUND, + node->kfd->num_nodes); + if (unlikely(!node->kfd->ih_wq)) { + kfifo_free(&node->ih_fifo); + dev_err(node->adev->dev, "Failed to allocate KFD IH workqueue\n"); + return -ENOMEM; + } } spin_lock_init(&node->interrupt_lock); @@ -96,16 +99,6 @@ void kfd_interrupt_exit(struct kfd_node *node) spin_lock_irqsave(&node->interrupt_lock, flags); node->interrupts_active = false; spin_unlock_irqrestore(&node->interrupt_lock, flags); - - /* - * flush_work ensures that there are no outstanding - * work-queue items that will access interrupt_ring. New work items - * can't be created because we stopped interrupt handling above. - */ - flush_workqueue(node->ih_wq); - - destroy_workqueue(node->ih_wq); - kfifo_free(&node->ih_fifo); } @@ -114,55 +107,48 @@ void kfd_interrupt_exit(struct kfd_node *node) */ bool enqueue_ih_ring_entry(struct kfd_node *node, const void *ih_ring_entry) { - int count; - - count = kfifo_in(&node->ih_fifo, ih_ring_entry, - node->kfd->device_info.ih_ring_entry_size); - if (count != node->kfd->device_info.ih_ring_entry_size) { - dev_dbg_ratelimited(node->adev->dev, - "Interrupt ring overflow, dropping interrupt %d\n", - count); + if (kfifo_is_full(&node->ih_fifo)) { + dev_warn_ratelimited(node->adev->dev, "KFD node %d ih_fifo overflow\n", + node->node_id); return false; } + kfifo_in(&node->ih_fifo, ih_ring_entry, node->kfd->device_info.ih_ring_entry_size); return true; } /* * Assumption: single reader/writer. This function is not re-entrant */ -static bool dequeue_ih_ring_entry(struct kfd_node *node, void *ih_ring_entry) +static bool dequeue_ih_ring_entry(struct kfd_node *node, u32 **ih_ring_entry) { int count; - count = kfifo_out(&node->ih_fifo, ih_ring_entry, - node->kfd->device_info.ih_ring_entry_size); - - WARN_ON(count && count != node->kfd->device_info.ih_ring_entry_size); + if (kfifo_is_empty(&node->ih_fifo)) + return false; + count = kfifo_out_linear_ptr(&node->ih_fifo, ih_ring_entry, + node->kfd->device_info.ih_ring_entry_size); + WARN_ON(count != node->kfd->device_info.ih_ring_entry_size); return count == node->kfd->device_info.ih_ring_entry_size; } static void interrupt_wq(struct work_struct *work) { - struct kfd_node *dev = container_of(work, struct kfd_node, - interrupt_work); - uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; + struct kfd_node *dev = container_of(work, struct kfd_node, interrupt_work); + uint32_t *ih_ring_entry; unsigned long start_jiffies = jiffies; - if (dev->kfd->device_info.ih_ring_entry_size > sizeof(ih_ring_entry)) { - dev_err_once(dev->adev->dev, "Ring entry too small\n"); - return; - } - - while (dequeue_ih_ring_entry(dev, ih_ring_entry)) { + while (dequeue_ih_ring_entry(dev, &ih_ring_entry)) { dev->kfd->device_info.event_interrupt_class->interrupt_wq(dev, ih_ring_entry); + kfifo_skip_count(&dev->ih_fifo, dev->kfd->device_info.ih_ring_entry_size); + if (time_is_before_jiffies(start_jiffies + HZ)) { /* If we spent more than a second processing signals, * reschedule the worker to avoid soft-lockup warnings */ - queue_work(dev->ih_wq, &dev->interrupt_work); + queue_work(dev->kfd->ih_wq, &dev->interrupt_work); break; } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index eacfeb32f35d..4b275937d05e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -306,7 +306,7 @@ svm_migrate_copy_to_vram(struct kfd_node *node, struct svm_range *prange, spage = migrate_pfn_to_page(migrate->src[i]); if (spage && !is_zone_device_page(spage)) { src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, - DMA_TO_DEVICE); + DMA_BIDIRECTIONAL); r = dma_mapping_error(dev, src[i]); if (r) { dev_err(dev, "%s: fail %d dma_map_page\n", @@ -629,7 +629,7 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, goto out_oom; } - dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); + dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_BIDIRECTIONAL); r = dma_mapping_error(dev, dst[i]); if (r) { dev_err(adev->dev, "%s: fail %d dma_map_page\n", __func__, r); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 84e8ea3a8a0c..ff417d5361c4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -78,7 +78,8 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se2 = se_mask[2]; m->compute_static_thread_mgmt_se3 = se_mask[3]; if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) && - KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4)) { + KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) && + KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) { m->compute_static_thread_mgmt_se4 = se_mask[4]; m->compute_static_thread_mgmt_se5 = se_mask[5]; m->compute_static_thread_mgmt_se6 = se_mask[6]; @@ -301,7 +302,8 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_ctx_save_control = 0; if (KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 3) && - KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4)) + KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 4, 4) && + KFD_GC_VERSION(mm->dev) != IP_VERSION(9, 5, 0)) update_cu_mask(mm, mqd, minfo, 0); set_priority(m, q); @@ -885,7 +887,8 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->debugfs_show_mqd = debugfs_show_mqd; #endif if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) { + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) { mqd->init_mqd = init_mqd_v9_4_3; mqd->load_mqd = load_mqd_v9_4_3; mqd->update_mqd = update_mqd_v9_4_3; @@ -909,8 +912,10 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif + mqd->check_preemption_failed = check_preemption_failed; if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) { + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) { mqd->init_mqd = init_mqd_hiq_v9_4_3; mqd->load_mqd = hiq_load_mqd_kiq_v9_4_3; mqd->destroy_mqd = destroy_hiq_mqd_v9_4_3; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 37930629edc5..4984b41cd372 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -28,6 +28,10 @@ #include "kfd_kernel_queue.h" #include "kfd_priv.h" +#define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0) +#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1) +#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2) + static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, unsigned int buffer_size_bytes) { @@ -40,7 +44,7 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, - bool *over_subscription) + int *over_subscription) { unsigned int process_count, queue_count, compute_queue_count, gws_queue_count; unsigned int map_queue_size; @@ -58,17 +62,20 @@ static void pm_calc_rlib_size(struct packet_manager *pm, * hws_max_conc_proc has been done in * kgd2kfd_device_init(). */ - *over_subscription = false; + *over_subscription = 0; if (node->max_proc_per_quantum > 1) max_proc_per_quantum = node->max_proc_per_quantum; - if ((process_count > max_proc_per_quantum) || - compute_queue_count > get_cp_queues_num(pm->dqm) || - gws_queue_count > 1) { - *over_subscription = true; + if (process_count > max_proc_per_quantum) + *over_subscription |= OVER_SUBSCRIPTION_PROCESS_COUNT; + if (compute_queue_count > get_cp_queues_num(pm->dqm)) + *over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT; + if (gws_queue_count > 1) + *over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT; + + if (*over_subscription) dev_dbg(dev, "Over subscribed runlist\n"); - } map_queue_size = pm->pmf->map_queues_size; /* calculate run list ib allocation size */ @@ -89,7 +96,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, unsigned int **rl_buffer, uint64_t *rl_gpu_buffer, unsigned int *rl_buffer_size, - bool *is_over_subscription) + int *is_over_subscription) { struct kfd_node *node = pm->dqm->dev; struct device *dev = node->adev->dev; @@ -134,7 +141,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, struct qcm_process_device *qpd; struct queue *q; struct kernel_queue *kq; - bool is_over_subscription; + int is_over_subscription; rl_wptr = retval = processes_mapped = 0; @@ -213,15 +220,20 @@ static int pm_create_runlist_ib(struct packet_manager *pm, if (is_over_subscription) { if (!pm->is_over_subscription) - dev_warn( - dev, - "Runlist is getting oversubscribed. Expect reduced ROCm performance.\n"); + dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s. Expect reduced ROCm performance.\n", + is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ? + " too many processes." : "", + is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ? + " too many queues." : "", + is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ? + " multiple processes using cooperative launch." : ""); + retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, alloc_size_bytes / sizeof(uint32_t), true); } - pm->is_over_subscription = is_over_subscription; + pm->is_over_subscription = !!is_over_subscription; for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++) pr_debug("0x%2X ", rl_buffer[i]); @@ -248,7 +260,8 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) default: if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) || KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4)) + KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0)) pm->pmf = &kfd_aldebaran_pm_funcs; else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1)) pm->pmf = &kfd_v9_pm_funcs; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9e5ca0b93b2a..c32b255c0eb2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -207,7 +207,8 @@ enum cache_policy { #define KFD_SUPPORT_XNACK_PER_PROCESS(dev)\ ((KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2)) || \ (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3)) || \ - (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4))) + (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) || \ + (KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0))) struct kfd_node; @@ -273,7 +274,6 @@ struct kfd_node { /* Interrupts */ struct kfifo ih_fifo; - struct workqueue_struct *ih_wq; struct work_struct interrupt_work; spinlock_t interrupt_lock; @@ -366,6 +366,8 @@ struct kfd_dev { struct kfd_node *nodes[MAX_KFD_NODES]; unsigned int num_nodes; + struct workqueue_struct *ih_wq; + /* Kernel doorbells for KFD device */ struct amdgpu_bo *doorbells; @@ -1002,6 +1004,9 @@ struct kfd_process { struct semaphore runtime_enable_sema; bool is_runtime_retry; struct kfd_runtime_info runtime_info; + + /* if gpu page fault sent to KFD */ + bool gpu_page_fault; }; #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ @@ -1150,7 +1155,8 @@ static inline struct kfd_node *kfd_node_by_irq_ids(struct amdgpu_device *adev, uint32_t i; if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && - KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4)) + KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) && + KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) return dev->nodes[0]; for (i = 0; i < dev->num_nodes; i++) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 87cd52cf4ee9..0976b5b0e8e8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1076,7 +1076,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) kfd_free_process_doorbells(pdd->dev->kfd, pdd); - if (pdd->dev->kfd->shared_resources.enable_mes) + if (pdd->dev->kfd->shared_resources.enable_mes && + pdd->proc_ctx_cpu_ptr) amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, &pdd->proc_ctx_bo); /* @@ -1608,7 +1609,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, struct kfd_process *p) { struct kfd_process_device *pdd = NULL; - int retval = 0; if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE)) return NULL; @@ -1632,21 +1632,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, pdd->user_gpu_id = dev->id; atomic64_set(&pdd->evict_duration_counter, 0); - if (dev->kfd->shared_resources.enable_mes) { - retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, - AMDGPU_MES_PROC_CTX_SIZE, - &pdd->proc_ctx_bo, - &pdd->proc_ctx_gpu_addr, - &pdd->proc_ctx_cpu_ptr, - false); - if (retval) { - dev_err(dev->adev->dev, - "failed to allocate process context bo\n"); - goto err_free_pdd; - } - memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); - } - p->pdds[p->n_pdds++] = pdd; if (kfd_dbg_is_per_vmid_supported(pdd->dev)) pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap( @@ -1658,10 +1643,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, idr_init(&pdd->alloc_idr); return pdd; - -err_free_pdd: - kfree(pdd); - return NULL; } /** @@ -2146,10 +2127,11 @@ int kfd_process_drain_interrupts(struct kfd_process_device *pdd) irq_drain_fence[3] = pdd->process->pasid; /* - * For GFX 9.4.3, send the NodeId also in IH cookie DW[3] + * For GFX 9.4.3/9.5.0, send the NodeId also in IH cookie DW[3] */ if (KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4)) { + KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(pdd->dev->kfd) == IP_VERSION(9, 5, 0)) { node_id = ffs(pdd->dev->interrupt_bitmap) - 1; irq_drain_fence[3] |= node_id << 16; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index c76db22a1000..9df56f8e09f9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -131,8 +131,9 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, if (!gws && pdd->qpd.num_gws == 0) return -EINVAL; - if (KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && - KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) && + if ((KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 3) && + KFD_GC_VERSION(dev) != IP_VERSION(9, 4, 4) && + KFD_GC_VERSION(dev) != IP_VERSION(9, 5, 0)) && !dev->kfd->shared_resources.enable_mes) { if (gws) ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, @@ -197,6 +198,7 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm, if (pqn->q->gws) { if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 3) && KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 4, 4) && + KFD_GC_VERSION(pqn->q->device) != IP_VERSION(9, 5, 0) && !dev->kfd->shared_resources.enable_mes) amdgpu_amdkfd_remove_gws_from_process( pqm->process->kgd_process_info, pqn->q->gws); @@ -212,13 +214,17 @@ static void pqm_clean_queue_resource(struct process_queue_manager *pqm, void pqm_uninit(struct process_queue_manager *pqm) { struct process_queue_node *pqn, *next; - struct kfd_process_device *pdd; list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { if (pqn->q) { - pdd = kfd_get_process_device_data(pqn->q->device, pqm->process); - kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); - kfd_queue_release_buffers(pdd, &pqn->q->properties); + struct kfd_process_device *pdd = kfd_get_process_device_data(pqn->q->device, + pqm->process); + if (pdd) { + kfd_queue_unref_bo_vas(pdd, &pqn->q->properties); + kfd_queue_release_buffers(pdd, &pqn->q->properties); + } else { + WARN_ON(!pdd); + } pqm_clean_queue_resource(pqm, pqn); } @@ -316,11 +322,12 @@ int pqm_create_queue(struct process_queue_manager *pqm, unsigned int max_queues = 127; /* HWS limit */ /* - * On GFX 9.4.3, increase the number of queues that - * can be created to 255. No HWS limit on GFX 9.4.3. + * On GFX 9.4.3/9.5.0, increase the number of queues that + * can be created to 255. No HWS limit on GFX 9.4.3/9.5.0. */ if (KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4)) + KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(dev) == IP_VERSION(9, 5, 0)) max_queues = 255; q = NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index ad29634f8b44..ecccd7adbab4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -394,7 +394,8 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) if ((gfxv / 100 * 100) == 90400 || /* GFX_VERSION_AQUA_VANJARAM */ gfxv == 90010 || /* GFX_VERSION_ALDEBARAN */ - gfxv == 90008) /* GFX_VERSION_ARCTURUS */ + gfxv == 90008 || /* GFX_VERSION_ARCTURUS */ + gfxv == 90500) vgpr_size = 0x80000; else if (gfxv == 110000 || /* GFX_VERSION_PLUM_BONITO */ gfxv == 110001 || /* GFX_VERSION_WHEAT_NAS */ @@ -405,9 +406,10 @@ static u32 kfd_get_vgpr_size_per_cu(u32 gfxv) return vgpr_size; } -#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv) \ +#define WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props) \ (kfd_get_vgpr_size_per_cu(gfxv) + SGPR_SIZE_PER_CU +\ - LDS_SIZE_PER_CU + HWREG_SIZE_PER_CU) + (((gfxv) == 90500) ? (props->lds_size_in_kb << 10) : LDS_SIZE_PER_CU) +\ + HWREG_SIZE_PER_CU) #define CNTL_STACK_BYTES_PER_WAVE(gfxv) \ ((gfxv) >= 100100 ? 12 : 8) /* GFX_VERSION_NAVI10*/ @@ -431,7 +433,7 @@ void kfd_queue_ctx_save_restore_size(struct kfd_topology_device *dev) min(cu_num * 40, props->array_count / props->simd_arrays_per_engine * 512) : cu_num * 32; - wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv), PAGE_SIZE); + wg_data_size = ALIGN(cu_num * WG_CONTEXT_DATA_SIZE_PER_CU(gfxv, props), PAGE_SIZE); ctl_stack_size = wave_num * CNTL_STACK_BYTES_PER_WAVE(gfxv) + 8; ctl_stack_size = ALIGN(SIZEOF_HSA_USER_CONTEXT_SAVE_AREA_HEADER + ctl_stack_size, PAGE_SIZE); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 3e2911895c74..bd3e20d981e0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1195,6 +1195,7 @@ svm_range_get_pte_flags(struct kfd_node *node, struct kfd_node *bo_node; uint32_t flags = prange->flags; uint32_t mapping_flags = 0; + uint32_t gc_ip_version = KFD_GC_VERSION(node); uint64_t pte_flags; bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT); @@ -1204,7 +1205,7 @@ svm_range_get_pte_flags(struct kfd_node *node, if (domain == SVM_RANGE_VRAM_DOMAIN) bo_node = prange->svm_bo->node; - switch (amdgpu_ip_version(node->adev, GC_HWIP, 0)) { + switch (gc_ip_version) { case IP_VERSION(9, 4, 1): if (domain == SVM_RANGE_VRAM_DOMAIN) { if (bo_node == node) { @@ -1241,8 +1242,10 @@ svm_range_get_pte_flags(struct kfd_node *node, break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): if (ext_coherent) - mtype_local = node->adev->rev_id ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_UC; + mtype_local = (gc_ip_version < IP_VERSION(9, 5, 0) && !node->adev->rev_id) ? + AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_CC; else mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; @@ -1257,9 +1260,13 @@ svm_range_get_pte_flags(struct kfd_node *node, */ else if (svm_nodes_in_same_hive(bo_node, node) && !ext_coherent) mapping_flags |= AMDGPU_VM_MTYPE_NC; - /* PCIe P2P or extended system scope coherence */ - else + /* PCIe P2P on GPUs pre-9.5.0 */ + else if (gc_ip_version < IP_VERSION(9, 5, 0) && + !svm_nodes_in_same_hive(bo_node, node)) mapping_flags |= AMDGPU_VM_MTYPE_UC; + /* Other remote memory */ + else + mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; /* system memory accessed by the APU */ } else if (node->adev->flags & AMD_IS_APU) { /* On NUMA systems, locality is determined per-page @@ -1271,7 +1278,10 @@ svm_range_get_pte_flags(struct kfd_node *node, mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; /* system memory accessed by the dGPU */ } else { - mapping_flags |= AMDGPU_VM_MTYPE_UC; + if (gc_ip_version < IP_VERSION(9, 5, 0)) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; } break; case IP_VERSION(12, 0, 0): @@ -1299,7 +1309,7 @@ svm_range_get_pte_flags(struct kfd_node *node, pte_flags = AMDGPU_PTE_VALID; pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM; pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; - if (KFD_GC_VERSION(node) >= IP_VERSION(12, 0, 0)) + if (gc_ip_version >= IP_VERSION(12, 0, 0)) pte_flags |= AMDGPU_PTE_IS_PTE; pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 9476e30d6baa..ceb9fb475ef1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1714,7 +1714,8 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext, pcache->cacheline_size = pcache_info[cache_type].cache_line_size; if (KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 3) || - KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4)) + KFD_GC_VERSION(knode) == IP_VERSION(9, 4, 4) || + KFD_GC_VERSION(knode) == IP_VERSION(9, 5, 0)) mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); else mode = UNKNOWN_MEMORY_PARTITION_MODE; @@ -1776,7 +1777,7 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info; struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config; int gpu_processor_id; - struct kfd_cache_properties *props_ext; + struct kfd_cache_properties *props_ext = NULL; int num_of_entries = 0; int num_of_cache_types = 0; struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES]; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 48be917e7bc5..b631b2eba0f0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -93,6 +93,7 @@ #include <drm/drm_fourcc.h> #include <drm/drm_edid.h> #include <drm/drm_eld.h> +#include <drm/drm_utils.h> #include <drm/drm_vblank.h> #include <drm/drm_audio_component.h> #include <drm/drm_gem_atomic_helper.h> @@ -955,13 +956,13 @@ static void dm_dmub_outbox1_low_irq(void *interrupt_params) } } -static int dm_set_clockgating_state(void *handle, +static int dm_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int dm_set_powergating_state(void *handle, +static int dm_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -1036,8 +1037,10 @@ static int amdgpu_dm_audio_component_get_eld(struct device *kdev, int port, continue; *enabled = true; + mutex_lock(&connector->eld_mutex); ret = drm_eld_size(connector->eld); memcpy(buf, connector->eld, min(max_bytes, ret)); + mutex_unlock(&connector->eld_mutex); break; } @@ -2152,8 +2155,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) } #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) - adev->dm.secure_display_ctxs = amdgpu_dm_crtc_secure_display_create_contexts(adev); - if (!adev->dm.secure_display_ctxs) + amdgpu_dm_crtc_secure_display_create_contexts(adev); + if (!adev->dm.secure_display_ctx.crtc_ctx) DRM_ERROR("amdgpu: failed to initialize secure display contexts.\n"); #endif @@ -2197,15 +2200,15 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) amdgpu_dm_destroy_drm_device(&adev->dm); #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) - if (adev->dm.secure_display_ctxs) { + if (adev->dm.secure_display_ctx.crtc_ctx) { for (i = 0; i < adev->mode_info.num_crtc; i++) { - if (adev->dm.secure_display_ctxs[i].crtc) { - flush_work(&adev->dm.secure_display_ctxs[i].notify_ta_work); - flush_work(&adev->dm.secure_display_ctxs[i].forward_roi_work); + if (adev->dm.secure_display_ctx.crtc_ctx[i].crtc) { + flush_work(&adev->dm.secure_display_ctx.crtc_ctx[i].notify_ta_work); + flush_work(&adev->dm.secure_display_ctx.crtc_ctx[i].forward_roi_work); } } - kfree(adev->dm.secure_display_ctxs); - adev->dm.secure_display_ctxs = NULL; + kfree(adev->dm.secure_display_ctx.crtc_ctx); + adev->dm.secure_display_ctx.crtc_ctx = NULL; } #endif if (adev->dm.hdcp_workqueue) { @@ -2338,7 +2341,8 @@ static int load_dmcu_fw(struct amdgpu_device *adev) return 0; } - r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, "%s", fw_name_dmcu); + r = amdgpu_ucode_request(adev, &adev->dm.fw_dmcu, AMDGPU_UCODE_REQUIRED, + "%s", fw_name_dmcu); if (r == -ENODEV) { /* DMCU firmware is not necessary, so don't raise a fuss if it's missing */ DRM_DEBUG_KMS("dm: DMCU firmware not found\n"); @@ -3457,6 +3461,7 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) struct drm_connector *conn_base; struct amdgpu_device *adev; struct drm_luminance_range_info *luminance_range; + int min_input_signal_override; if (aconnector->bl_idx == -1 || aconnector->dc_link->connector_signal != SIGNAL_TYPE_EDP) @@ -3493,6 +3498,10 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) caps->aux_min_input_signal = 0; caps->aux_max_input_signal = 512; } + + min_input_signal_override = drm_get_panel_min_brightness_quirk(aconnector->drm_edid); + if (min_input_signal_override >= 0) + caps->min_input_signal = min_input_signal_override; } void amdgpu_dm_update_connector_after_detect( @@ -5306,7 +5315,8 @@ static int dm_init_microcode(struct amdgpu_device *adev) /* ASIC doesn't support DMUB. */ return 0; } - r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, "%s", fw_name_dmub); + r = amdgpu_ucode_request(adev, &adev->dm.dmub_fw, AMDGPU_UCODE_REQUIRED, + "%s", fw_name_dmub); return r; } @@ -5522,8 +5532,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, const u64 tiling_flags, struct dc_plane_info *plane_info, struct dc_plane_address *address, - bool tmz_surface, - bool force_disable_dcc) + bool tmz_surface) { const struct drm_framebuffer *fb = plane_state->fb; const struct amdgpu_framebuffer *afb = @@ -5622,7 +5631,7 @@ fill_dc_plane_info_and_addr(struct amdgpu_device *adev, &plane_info->tiling_info, &plane_info->plane_size, &plane_info->dcc, address, - tmz_surface, force_disable_dcc); + tmz_surface); if (ret) return ret; @@ -5643,7 +5652,6 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, struct dc_scaling_info scaling_info; struct dc_plane_info plane_info; int ret; - bool force_disable_dcc = false; ret = amdgpu_dm_plane_fill_dc_scaling_info(adev, plane_state, &scaling_info); if (ret) @@ -5654,13 +5662,11 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, dc_plane_state->clip_rect = scaling_info.clip_rect; dc_plane_state->scaling_quality = scaling_info.scaling_quality; - force_disable_dcc = adev->asic_type == CHIP_RAVEN && adev->in_suspend; ret = fill_dc_plane_info_and_addr(adev, plane_state, afb->tiling_flags, &plane_info, &dc_plane_state->address, - afb->tmz_surface, - force_disable_dcc); + afb->tmz_surface); if (ret) return ret; @@ -9097,7 +9103,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, afb->tiling_flags, &bundle->plane_infos[planes_count], &bundle->flip_addrs[planes_count].address, - afb->tmz_surface, false); + afb->tmz_surface); drm_dbg_state(state->dev, "plane: id=%d dcc_en=%d\n", new_plane_state->plane->index, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 6464a8378387..e46e1365fe91 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -541,12 +541,12 @@ struct amdgpu_display_manager { #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) /** - * @secure_display_ctxs: + * @secure_display_ctx: * - * Store the ROI information and the work_struct to command dmub and psp for - * all crtcs. + * Store secure display relevant info. e.g. the ROI information + * , the work_struct to command dmub, etc. */ - struct secure_display_context *secure_display_ctxs; + struct secure_display_context secure_display_ctx; #endif /** * @hpd_rx_offload_wq: diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index f936a35fa9eb..6fdc306a4a86 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -83,12 +83,221 @@ const char *const *amdgpu_dm_crtc_get_crc_sources(struct drm_crtc *crtc, } #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY +static void update_phy_id_mapping(struct amdgpu_device *adev) +{ + struct drm_device *ddev = adev_to_drm(adev); + struct amdgpu_display_manager *dm = &adev->dm; + struct drm_connector *connector; + struct amdgpu_dm_connector *aconnector; + struct amdgpu_dm_connector *sort_connector[AMDGPU_DM_MAX_CRTC] = {NULL}; + struct drm_connector_list_iter iter; + uint8_t idx = 0, idx_2 = 0, connector_cnt = 0; + + dm->secure_display_ctx.phy_mapping_updated = false; + + mutex_lock(&ddev->mode_config.mutex); + drm_connector_list_iter_begin(ddev, &iter); + drm_for_each_connector_iter(connector, &iter) { + + if (connector->status != connector_status_connected) + continue; + + if (idx >= AMDGPU_DM_MAX_CRTC) { + DRM_WARN("%s connected connectors exceed max crtc\n", __func__); + mutex_unlock(&ddev->mode_config.mutex); + return; + } + + aconnector = to_amdgpu_dm_connector(connector); + + sort_connector[idx] = aconnector; + idx++; + connector_cnt++; + } + drm_connector_list_iter_end(&iter); + + /* sort connectors by link_enc_hw_instance first */ + for (idx = connector_cnt; idx > 1 ; idx--) { + for (idx_2 = 0; idx_2 < (idx - 1); idx_2++) { + if (sort_connector[idx_2]->dc_link->link_enc_hw_inst > + sort_connector[idx_2 + 1]->dc_link->link_enc_hw_inst) + swap(sort_connector[idx_2], sort_connector[idx_2 + 1]); + } + } + + /* + * Sort mst connectors by RAD. mst connectors with the same enc_hw_instance are already + * sorted together above. + */ + for (idx = 0; idx < connector_cnt; /*Do nothing*/) { + if (sort_connector[idx]->mst_root) { + uint8_t i, j, k; + uint8_t mst_con_cnt = 1; + + for (idx_2 = (idx + 1); idx_2 < connector_cnt; idx_2++) { + if (sort_connector[idx_2]->mst_root == sort_connector[idx]->mst_root) + mst_con_cnt++; + else + break; + } + + for (i = mst_con_cnt; i > 1; i--) { + for (j = idx; j < (idx + i - 2); j++) { + int mstb_lct = sort_connector[j]->mst_output_port->parent->lct; + int next_mstb_lct = sort_connector[j + 1]->mst_output_port->parent->lct; + u8 *rad; + u8 *next_rad; + bool swap = false; + + /* Sort by mst tree depth first. Then compare RAD if depth is the same*/ + if (mstb_lct > next_mstb_lct) { + swap = true; + } else if (mstb_lct == next_mstb_lct) { + if (mstb_lct == 1) { + if (sort_connector[j]->mst_output_port->port_num > sort_connector[j + 1]->mst_output_port->port_num) + swap = true; + } else if (mstb_lct > 1) { + rad = sort_connector[j]->mst_output_port->parent->rad; + next_rad = sort_connector[j + 1]->mst_output_port->parent->rad; + + for (k = 0; k < mstb_lct - 1; k++) { + int shift = (k % 2) ? 0 : 4; + int port_num = (rad[k / 2] >> shift) & 0xf; + int next_port_num = (next_rad[k / 2] >> shift) & 0xf; + + if (port_num > next_port_num) { + swap = true; + break; + } + } + } else { + DRM_ERROR("MST LCT shouldn't be set as < 1"); + mutex_unlock(&ddev->mode_config.mutex); + return; + } + } + + if (swap) + swap(sort_connector[j], sort_connector[j + 1]); + } + } + + idx += mst_con_cnt; + } else { + idx++; + } + } + + /* Complete sorting. Assign relavant result to dm->secure_display_ctx.phy_id_mapping[]*/ + memset(dm->secure_display_ctx.phy_id_mapping, 0, sizeof(dm->secure_display_ctx.phy_id_mapping)); + for (idx = 0; idx < connector_cnt; idx++) { + aconnector = sort_connector[idx]; + + dm->secure_display_ctx.phy_id_mapping[idx].assigned = true; + dm->secure_display_ctx.phy_id_mapping[idx].is_mst = false; + dm->secure_display_ctx.phy_id_mapping[idx].enc_hw_inst = aconnector->dc_link->link_enc_hw_inst; + + if (sort_connector[idx]->mst_root) { + dm->secure_display_ctx.phy_id_mapping[idx].is_mst = true; + dm->secure_display_ctx.phy_id_mapping[idx].lct = aconnector->mst_output_port->parent->lct; + dm->secure_display_ctx.phy_id_mapping[idx].port_num = aconnector->mst_output_port->port_num; + memcpy(dm->secure_display_ctx.phy_id_mapping[idx].rad, + aconnector->mst_output_port->parent->rad, sizeof(aconnector->mst_output_port->parent->rad)); + } + } + mutex_unlock(&ddev->mode_config.mutex); + + dm->secure_display_ctx.phy_id_mapping_cnt = connector_cnt; + dm->secure_display_ctx.phy_mapping_updated = true; +} + +static bool get_phy_id(struct amdgpu_display_manager *dm, + struct amdgpu_dm_connector *aconnector, uint8_t *phy_id) +{ + int idx, idx_2; + bool found = false; + + /* + * Assume secure display start after all connectors are probed. The connection + * config is static as well + */ + if (!dm->secure_display_ctx.phy_mapping_updated) { + DRM_WARN("%s Should update the phy id table before get it's value", __func__); + return false; + } + + for (idx = 0; idx < dm->secure_display_ctx.phy_id_mapping_cnt; idx++) { + if (!dm->secure_display_ctx.phy_id_mapping[idx].assigned) { + DRM_ERROR("phy_id_mapping[%d] should be assigned", idx); + return false; + } + + if (aconnector->dc_link->link_enc_hw_inst == + dm->secure_display_ctx.phy_id_mapping[idx].enc_hw_inst) { + if (!dm->secure_display_ctx.phy_id_mapping[idx].is_mst) { + found = true; + goto out; + } else { + /* Could caused by wrongly pass mst root connector */ + if (!aconnector->mst_output_port) { + DRM_ERROR("%s Check mst case but connector without a port assigned", __func__); + return false; + } + + if (aconnector->mst_root && + aconnector->mst_root->mst_mgr.mst_primary == NULL) { + DRM_WARN("%s pass in a stale mst connector", __func__); + } + + if (aconnector->mst_output_port->parent->lct == dm->secure_display_ctx.phy_id_mapping[idx].lct && + aconnector->mst_output_port->port_num == dm->secure_display_ctx.phy_id_mapping[idx].port_num) { + if (aconnector->mst_output_port->parent->lct == 1) { + found = true; + goto out; + } else if (aconnector->mst_output_port->parent->lct > 1) { + /* Check RAD */ + for (idx_2 = 0; idx_2 < aconnector->mst_output_port->parent->lct - 1; idx_2++) { + int shift = (idx_2 % 2) ? 0 : 4; + int port_num = (aconnector->mst_output_port->parent->rad[idx_2 / 2] >> shift) & 0xf; + int port_num2 = (dm->secure_display_ctx.phy_id_mapping[idx].rad[idx_2 / 2] >> shift) & 0xf; + + if (port_num != port_num2) + break; + } + + if (idx_2 == aconnector->mst_output_port->parent->lct - 1) { + found = true; + goto out; + } + } else { + DRM_ERROR("lCT should be >= 1"); + return false; + } + } + } + } + } + +out: + if (found) { + DRM_DEBUG_DRIVER("Associated secure display PHY ID as %d", idx); + *phy_id = idx; + } else { + DRM_WARN("Can't find associated phy ID"); + return false; + } + + return true; +} + static void amdgpu_dm_set_crc_window_default(struct drm_crtc *crtc, struct dc_stream_state *stream) { struct drm_device *drm_dev = crtc->dev; struct amdgpu_display_manager *dm = &drm_to_adev(drm_dev)->dm; struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); bool was_activated; + struct amdgpu_dm_connector *aconnector; + uint8_t phy_id; spin_lock_irq(&drm_dev->event_lock); was_activated = acrtc->dm_irq_params.window_param.activated; @@ -104,24 +313,32 @@ static void amdgpu_dm_set_crc_window_default(struct drm_crtc *crtc, struct dc_st /* Disable secure_display if it was enabled */ if (was_activated) { /* stop ROI update on this crtc */ - flush_work(&dm->secure_display_ctxs[crtc->index].notify_ta_work); - flush_work(&dm->secure_display_ctxs[crtc->index].forward_roi_work); - dc_stream_forward_crc_window(stream, NULL, true); + flush_work(&dm->secure_display_ctx.crtc_ctx[crtc->index].notify_ta_work); + flush_work(&dm->secure_display_ctx.crtc_ctx[crtc->index].forward_roi_work); + + aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + + if (aconnector && get_phy_id(dm, aconnector, &phy_id)) + dc_stream_forward_crc_window(stream, NULL, phy_id, true); + else + DRM_DEBUG_DRIVER("%s Can't find matching phy id", __func__); } } static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work) { - struct secure_display_context *secure_display_ctx; + struct secure_display_crtc_context *crtc_ctx; struct psp_context *psp; struct ta_securedisplay_cmd *securedisplay_cmd; struct drm_crtc *crtc; struct dc_stream_state *stream; + struct amdgpu_dm_connector *aconnector; uint8_t phy_inst; + struct amdgpu_display_manager *dm; int ret; - secure_display_ctx = container_of(work, struct secure_display_context, notify_ta_work); - crtc = secure_display_ctx->crtc; + crtc_ctx = container_of(work, struct secure_display_crtc_context, notify_ta_work); + crtc = crtc_ctx->crtc; if (!crtc) return; @@ -133,8 +350,19 @@ static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work) return; } + dm = &drm_to_adev(crtc->dev)->dm; stream = to_amdgpu_crtc(crtc)->dm_irq_params.stream; - phy_inst = stream->link->link_enc_hw_inst; + aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + if (!aconnector) + return; + + mutex_lock(&crtc->dev->mode_config.mutex); + if (!get_phy_id(dm, aconnector, &phy_inst)) { + DRM_WARN("%s Can't find mapping phy id!", __func__); + mutex_unlock(&crtc->dev->mode_config.mutex); + return; + } + mutex_unlock(&crtc->dev->mode_config.mutex); /* need lock for multiple crtcs to use the command buffer */ mutex_lock(&psp->securedisplay_context.mutex); @@ -160,22 +388,37 @@ static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work) static void amdgpu_dm_forward_crc_window(struct work_struct *work) { - struct secure_display_context *secure_display_ctx; + struct secure_display_crtc_context *crtc_ctx; struct amdgpu_display_manager *dm; struct drm_crtc *crtc; struct dc_stream_state *stream; + struct amdgpu_dm_connector *aconnector; + uint8_t phy_id; - secure_display_ctx = container_of(work, struct secure_display_context, forward_roi_work); - crtc = secure_display_ctx->crtc; + crtc_ctx = container_of(work, struct secure_display_crtc_context, forward_roi_work); + crtc = crtc_ctx->crtc; if (!crtc) return; dm = &drm_to_adev(crtc->dev)->dm; stream = to_amdgpu_crtc(crtc)->dm_irq_params.stream; + aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context; + + if (!aconnector) + return; + + mutex_lock(&crtc->dev->mode_config.mutex); + if (!get_phy_id(dm, aconnector, &phy_id)) { + DRM_WARN("%s Can't find mapping phy id!", __func__); + mutex_unlock(&crtc->dev->mode_config.mutex); + return; + } + mutex_unlock(&crtc->dev->mode_config.mutex); mutex_lock(&dm->dc_lock); - dc_stream_forward_crc_window(stream, &secure_display_ctx->rect, false); + dc_stream_forward_crc_window(stream, &crtc_ctx->rect, + phy_id, false); mutex_unlock(&dm->dc_lock); } @@ -258,6 +501,10 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) struct drm_crtc_commit *commit; struct dm_crtc_state *crtc_state; struct drm_device *drm_dev = crtc->dev; +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + struct amdgpu_device *adev = drm_to_adev(drm_dev); + struct amdgpu_display_manager *dm = &adev->dm; +#endif struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); struct drm_dp_aux *aux = NULL; bool enable = false; @@ -402,6 +649,12 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) /* Reset crc_skipped on dm state */ crtc_state->crc_skip_count = 0; +#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) + /* Initialize phy id mapping table for secure display*/ + if (!dm->secure_display_ctx.phy_mapping_updated) + update_phy_id_mapping(adev); +#endif + cleanup: if (commit) drm_crtc_commit_put(commit); @@ -472,7 +725,7 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc) enum amdgpu_dm_pipe_crc_source cur_crc_src; struct amdgpu_crtc *acrtc = NULL; struct amdgpu_device *adev = NULL; - struct secure_display_context *secure_display_ctx = NULL; + struct secure_display_crtc_context *crtc_ctx = NULL; unsigned long flags1; if (crtc == NULL) @@ -498,23 +751,23 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc) goto cleanup; } - secure_display_ctx = &adev->dm.secure_display_ctxs[acrtc->crtc_id]; - if (WARN_ON(secure_display_ctx->crtc != crtc)) { - /* We have set the crtc when creating secure_display_context, + crtc_ctx = &adev->dm.secure_display_ctx.crtc_ctx[acrtc->crtc_id]; + if (WARN_ON(crtc_ctx->crtc != crtc)) { + /* We have set the crtc when creating secure_display_crtc_context, * don't expect it to be changed here. */ - secure_display_ctx->crtc = crtc; + crtc_ctx->crtc = crtc; } if (acrtc->dm_irq_params.window_param.update_win) { /* prepare work for dmub to update ROI */ - secure_display_ctx->rect.x = acrtc->dm_irq_params.window_param.x_start; - secure_display_ctx->rect.y = acrtc->dm_irq_params.window_param.y_start; - secure_display_ctx->rect.width = acrtc->dm_irq_params.window_param.x_end - + crtc_ctx->rect.x = acrtc->dm_irq_params.window_param.x_start; + crtc_ctx->rect.y = acrtc->dm_irq_params.window_param.y_start; + crtc_ctx->rect.width = acrtc->dm_irq_params.window_param.x_end - acrtc->dm_irq_params.window_param.x_start; - secure_display_ctx->rect.height = acrtc->dm_irq_params.window_param.y_end - + crtc_ctx->rect.height = acrtc->dm_irq_params.window_param.y_end - acrtc->dm_irq_params.window_param.y_start; - schedule_work(&secure_display_ctx->forward_roi_work); + schedule_work(&crtc_ctx->forward_roi_work); acrtc->dm_irq_params.window_param.update_win = false; @@ -527,32 +780,34 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc) } else { /* prepare work for psp to read ROI/CRC and send to I2C */ - schedule_work(&secure_display_ctx->notify_ta_work); + schedule_work(&crtc_ctx->notify_ta_work); } cleanup: spin_unlock_irqrestore(&drm_dev->event_lock, flags1); } -struct secure_display_context * -amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev) +void amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev) { - struct secure_display_context *secure_display_ctxs = NULL; + struct secure_display_crtc_context *crtc_ctx = NULL; int i; - secure_display_ctxs = kcalloc(adev->mode_info.num_crtc, - sizeof(struct secure_display_context), + crtc_ctx = kcalloc(adev->mode_info.num_crtc, + sizeof(struct secure_display_crtc_context), GFP_KERNEL); - if (!secure_display_ctxs) - return NULL; + if (!crtc_ctx) { + adev->dm.secure_display_ctx.crtc_ctx = NULL; + return; + } for (i = 0; i < adev->mode_info.num_crtc; i++) { - INIT_WORK(&secure_display_ctxs[i].forward_roi_work, amdgpu_dm_forward_crc_window); - INIT_WORK(&secure_display_ctxs[i].notify_ta_work, amdgpu_dm_crtc_notify_ta_to_read); - secure_display_ctxs[i].crtc = &adev->mode_info.crtcs[i]->base; + INIT_WORK(&crtc_ctx[i].forward_roi_work, amdgpu_dm_forward_crc_window); + INIT_WORK(&crtc_ctx[i].notify_ta_work, amdgpu_dm_crtc_notify_ta_to_read); + crtc_ctx[i].crtc = &adev->mode_info.crtcs[i]->base; } - return secure_display_ctxs; + adev->dm.secure_display_ctx.crtc_ctx = crtc_ctx; + return; } #endif diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index 748e80ef40d0..4ec2510bc312 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -40,6 +40,17 @@ enum amdgpu_dm_pipe_crc_source { }; #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY +#define MAX_CRTC 6 + +struct phy_id_mapping { + bool assigned; + bool is_mst; + uint8_t enc_hw_inst; + u8 lct; + u8 port_num; + u8 rad[8]; +}; + struct crc_window_param { uint16_t x_start; uint16_t y_start; @@ -53,7 +64,7 @@ struct crc_window_param { int skip_frame_cnt; }; -struct secure_display_context { +struct secure_display_crtc_context { /* work to notify PSP TA*/ struct work_struct notify_ta_work; @@ -65,6 +76,15 @@ struct secure_display_context { /* Region of Interest (ROI) */ struct rect rect; }; + +struct secure_display_context { + + struct secure_display_crtc_context *crtc_ctx; + + bool phy_mapping_updated; + int phy_id_mapping_cnt; + struct phy_id_mapping phy_id_mapping[MAX_CRTC]; +}; #endif static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source source) @@ -95,8 +115,7 @@ void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc); #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc); void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc); -struct secure_display_context *amdgpu_dm_crtc_secure_display_create_contexts( - struct amdgpu_device *adev); +void amdgpu_dm_crtc_secure_display_create_contexts(struct amdgpu_device *adev); #else #define amdgpu_dm_crc_window_is_activated(x) #define amdgpu_dm_crtc_handle_crc_window_irq(x) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index 6a97bb2d9160..bc3e7a82b402 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -258,7 +258,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf, struct dc_link *link = connector->dc_link; struct amdgpu_device *adev = drm_to_adev(connector->base.dev); struct dc *dc = (struct dc *)link->dc; - struct dc_link_settings prefer_link_settings; + struct dc_link_settings prefer_link_settings = {0}; char *wr_buf = NULL; const uint32_t wr_buf_size = 40; /* 0: lane_count; 1: link_rate */ @@ -389,7 +389,7 @@ static ssize_t dp_mst_link_setting(struct file *f, const char __user *buf, struct dc_link *link = aconnector->dc_link; struct amdgpu_device *adev = drm_to_adev(aconnector->base.dev); struct dc *dc = (struct dc *)link->dc; - struct dc_link_settings prefer_link_settings; + struct dc_link_settings prefer_link_settings = {0}; char *wr_buf = NULL; const uint32_t wr_buf_size = 40; /* 0: lane_count; 1: link_rate */ @@ -613,7 +613,7 @@ static ssize_t dp_phy_settings_write(struct file *f, const char __user *buf, uint32_t wr_buf_size = 40; long param[3]; bool use_prefer_link_setting; - struct link_training_settings link_lane_settings; + struct link_training_settings link_lane_settings = {0}; int max_param_num = 3; uint8_t param_nums = 0; int r = 0; @@ -768,7 +768,7 @@ static ssize_t dp_phy_test_pattern_debugfs_write(struct file *f, const char __us LINK_RATE_UNKNOWN, LINK_SPREAD_DISABLED}; struct dc_link_settings cur_link_settings = {LANE_COUNT_UNKNOWN, LINK_RATE_UNKNOWN, LINK_SPREAD_DISABLED}; - struct link_training_settings link_training_settings; + struct link_training_settings link_training_settings = {0}; int i; if (size == 0) @@ -902,9 +902,10 @@ static int dmub_tracebuffer_show(struct seq_file *m, void *data) { struct amdgpu_device *adev = m->private; struct dmub_srv_fb_info *fb_info = adev->dm.dmub_fb_info; + struct dmub_fw_meta_info *fw_meta_info = NULL; struct dmub_debugfs_trace_entry *entries; uint8_t *tbuf_base; - uint32_t tbuf_size, max_entries, num_entries, i; + uint32_t tbuf_size, max_entries, num_entries, first_entry, i; if (!fb_info) return 0; @@ -913,20 +914,42 @@ static int dmub_tracebuffer_show(struct seq_file *m, void *data) if (!tbuf_base) return 0; - tbuf_size = fb_info->fb[DMUB_WINDOW_5_TRACEBUFF].size; + if (adev->dm.dmub_srv) + fw_meta_info = &adev->dm.dmub_srv->meta_info; + + tbuf_size = fw_meta_info ? fw_meta_info->trace_buffer_size : + DMUB_TRACE_BUFFER_SIZE; max_entries = (tbuf_size - sizeof(struct dmub_debugfs_trace_header)) / sizeof(struct dmub_debugfs_trace_entry); num_entries = ((struct dmub_debugfs_trace_header *)tbuf_base)->entry_count; + /* DMCUB tracebuffer is a ring. If it rolled over, print a hint that + * entries are being overwritten. + */ + if (num_entries > max_entries) + seq_printf(m, "...\n"); + + first_entry = num_entries % max_entries; num_entries = min(num_entries, max_entries); entries = (struct dmub_debugfs_trace_entry *)(tbuf_base + sizeof(struct dmub_debugfs_trace_header)); - for (i = 0; i < num_entries; ++i) { + /* To print entries chronologically, start from the first entry till the + * top of buffer, then from base of buffer to first entry. + */ + for (i = first_entry; i < num_entries; ++i) { + struct dmub_debugfs_trace_entry *entry = &entries[i]; + + seq_printf(m, + "trace_code=%u tick_count=%u param0=%u param1=%u\n", + entry->trace_code, entry->tick_count, entry->param0, + entry->param1); + } + for (i = 0; i < first_entry; ++i) { struct dmub_debugfs_trace_entry *entry = &entries[i]; seq_printf(m, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 6e4359490613..56fa2b9b5d7a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -590,11 +590,12 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr, amdgpu_dm_set_mst_status(&aconnector->mst_status, MST_PROBE, true); - if (drm_connector_init( + if (drm_connector_dynamic_init( dev, connector, &dm_dp_mst_connector_funcs, - DRM_MODE_CONNECTOR_DisplayPort)) { + DRM_MODE_CONNECTOR_DisplayPort, + NULL)) { kfree(aconnector); return NULL; } @@ -1688,16 +1689,16 @@ clean_exit: return ret; } -static unsigned int kbps_from_pbn(unsigned int pbn) +static uint32_t kbps_from_pbn(unsigned int pbn) { - unsigned int kbps = pbn; + uint64_t kbps = (uint64_t)pbn; kbps *= (1000000 / PEAK_FACTOR_X1000); kbps *= 8; kbps *= 54; kbps /= 64; - return kbps; + return (uint32_t)kbps; } static bool is_dsc_common_config_possible(struct dc_stream_state *stream, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c index 495e3cd70426..1ec4e4b9ea94 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c @@ -26,6 +26,7 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_blend.h> +#include "drm/drm_framebuffer.h" #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_plane_helper.h> #include <drm/drm_gem_framebuffer_helper.h> @@ -309,8 +310,7 @@ static int amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(struct amdg const struct plane_size *plane_size, union dc_tiling_info *tiling_info, struct dc_plane_dcc_param *dcc, - struct dc_plane_address *address, - const bool force_disable_dcc) + struct dc_plane_address *address) { const uint64_t modifier = afb->base.modifier; int ret = 0; @@ -318,7 +318,7 @@ static int amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(struct amdg amdgpu_dm_plane_fill_gfx9_tiling_info_from_modifier(adev, tiling_info, modifier); tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier); - if (amdgpu_dm_plane_modifier_has_dcc(modifier) && !force_disable_dcc) { + if (amdgpu_dm_plane_modifier_has_dcc(modifier)) { uint64_t dcc_address = afb->address + afb->base.offsets[1]; bool independent_64b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); bool independent_128b_blks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier); @@ -360,8 +360,7 @@ static int amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(struct amd const struct plane_size *plane_size, union dc_tiling_info *tiling_info, struct dc_plane_dcc_param *dcc, - struct dc_plane_address *address, - const bool force_disable_dcc) + struct dc_plane_address *address) { const uint64_t modifier = afb->base.modifier; int ret = 0; @@ -371,7 +370,7 @@ static int amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(struct amd tiling_info->gfx9.swizzle = amdgpu_dm_plane_modifier_gfx9_swizzle_mode(modifier); - if (amdgpu_dm_plane_modifier_has_dcc(modifier) && !force_disable_dcc) { + if (amdgpu_dm_plane_modifier_has_dcc(modifier)) { int max_compressed_block = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier); dcc->enable = 1; @@ -839,8 +838,7 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev, struct plane_size *plane_size, struct dc_plane_dcc_param *dcc, struct dc_plane_address *address, - bool tmz_surface, - bool force_disable_dcc) + bool tmz_surface) { const struct drm_framebuffer *fb = &afb->base; int ret; @@ -900,16 +898,14 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev, ret = amdgpu_dm_plane_fill_gfx12_plane_attributes_from_modifiers(adev, afb, format, rotation, plane_size, tiling_info, dcc, - address, - force_disable_dcc); + address); if (ret) return ret; } else if (adev->family >= AMDGPU_FAMILY_AI) { ret = amdgpu_dm_plane_fill_gfx9_plane_attributes_from_modifiers(adev, afb, format, rotation, plane_size, tiling_info, dcc, - address, - force_disable_dcc); + address); if (ret) return ret; } else { @@ -1000,14 +996,13 @@ static int amdgpu_dm_plane_helper_prepare_fb(struct drm_plane *plane, dm_plane_state_old->dc_state != dm_plane_state_new->dc_state) { struct dc_plane_state *plane_state = dm_plane_state_new->dc_state; - bool force_disable_dcc = !plane_state->dcc.enable; amdgpu_dm_plane_fill_plane_buffer_attributes( adev, afb, plane_state->format, plane_state->rotation, afb->tiling_flags, &plane_state->tiling_info, &plane_state->plane_size, &plane_state->dcc, &plane_state->address, - afb->tmz_surface, force_disable_dcc); + afb->tmz_surface); } return 0; @@ -1421,6 +1416,20 @@ static void amdgpu_dm_plane_atomic_async_update(struct drm_plane *plane, amdgpu_dm_plane_handle_cursor_update(plane, old_state); } +static void amdgpu_dm_plane_panic_flush(struct drm_plane *plane) +{ + struct dm_plane_state *dm_plane_state = to_dm_plane_state(plane->state); + struct drm_framebuffer *fb = plane->state->fb; + struct dc_plane_state *dc_plane_state; + + if (!dm_plane_state || !dm_plane_state->dc_state) + return; + + dc_plane_state = dm_plane_state->dc_state; + + dc_plane_force_update_for_panic(dc_plane_state, fb->modifier ? true : false); +} + static const struct drm_plane_helper_funcs dm_plane_helper_funcs = { .prepare_fb = amdgpu_dm_plane_helper_prepare_fb, .cleanup_fb = amdgpu_dm_plane_helper_cleanup_fb, @@ -1429,6 +1438,16 @@ static const struct drm_plane_helper_funcs dm_plane_helper_funcs = { .atomic_async_update = amdgpu_dm_plane_atomic_async_update }; +static const struct drm_plane_helper_funcs dm_primary_plane_helper_funcs = { + .prepare_fb = amdgpu_dm_plane_helper_prepare_fb, + .cleanup_fb = amdgpu_dm_plane_helper_cleanup_fb, + .atomic_check = amdgpu_dm_plane_atomic_check, + .atomic_async_check = amdgpu_dm_plane_atomic_async_check, + .atomic_async_update = amdgpu_dm_plane_atomic_async_update, + .get_scanout_buffer = amdgpu_display_get_scanout_buffer, + .panic_flush = amdgpu_dm_plane_panic_flush, +}; + static void amdgpu_dm_plane_drm_plane_reset(struct drm_plane *plane) { struct dm_plane_state *amdgpu_state = NULL; @@ -1855,7 +1874,10 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, plane->type != DRM_PLANE_TYPE_CURSOR) drm_plane_enable_fb_damage_clips(plane); - drm_plane_helper_add(plane, &dm_plane_helper_funcs); + if (plane->type == DRM_PLANE_TYPE_PRIMARY) + drm_plane_helper_add(plane, &dm_primary_plane_helper_funcs); + else + drm_plane_helper_add(plane, &dm_plane_helper_funcs); #ifdef AMD_PRIVATE_COLOR dm_atomic_plane_attach_color_mgmt_properties(dm, plane); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h index 6498359bff6f..2eef13b1c05a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.h @@ -51,8 +51,7 @@ int amdgpu_dm_plane_fill_plane_buffer_attributes(struct amdgpu_device *adev, struct plane_size *plane_size, struct dc_plane_dcc_param *dcc, struct dc_plane_address *address, - bool tmz_surface, - bool force_disable_dcc); + bool tmz_surface); int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm, struct drm_plane *plane, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index ab1132bc896a..d9955c5d2e5e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -174,7 +174,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32) ############################################################################### # DCN35 ############################################################################### -CLK_MGR_DCN35 = dcn35_smu.o dcn35_clk_mgr.o +CLK_MGR_DCN35 = dcn35_smu.o dcn351_clk_mgr.o dcn35_clk_mgr.o AMD_DAL_CLK_MGR_DCN35 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn35/,$(CLK_MGR_DCN35)) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c index 0e243f4344d0..4c3e58c730b1 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c @@ -355,8 +355,11 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p BREAK_TO_DEBUGGER(); return NULL; } + if (ctx->dce_version == DCN_VERSION_3_51) + dcn351_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); + else + dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); - dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); return &clk_mgr->base.base; } break; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c index 7920f6f1aa62..76c612ecfe3c 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn201/dcn201_clk_mgr.c @@ -34,8 +34,8 @@ #include "dm_services.h" #include "cyan_skillfish_ip_offset.h" -#include "dcn/dcn_2_0_3_offset.h" -#include "dcn/dcn_2_0_3_sh_mask.h" +#include "dcn/dcn_2_0_1_offset.h" +#include "dcn/dcn_2_0_1_sh_mask.h" #include "clk/clk_11_0_1_offset.h" #include "clk/clk_11_0_1_sh_mask.h" diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c new file mode 100644 index 000000000000..6a6ae618650b --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c @@ -0,0 +1,140 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "core_types.h" +#include "dcn35_clk_mgr.h" + +#define DCN_BASE__INST0_SEG1 0x000000C0 +#define mmCLK1_CLK_PLL_REQ 0x16E37 + +#define mmCLK1_CLK0_DFS_CNTL 0x16E69 +#define mmCLK1_CLK1_DFS_CNTL 0x16E6C +#define mmCLK1_CLK2_DFS_CNTL 0x16E6F +#define mmCLK1_CLK3_DFS_CNTL 0x16E72 +#define mmCLK1_CLK4_DFS_CNTL 0x16E75 +#define mmCLK1_CLK5_DFS_CNTL 0x16E78 + +#define mmCLK1_CLK0_CURRENT_CNT 0x16EFC +#define mmCLK1_CLK1_CURRENT_CNT 0x16EFD +#define mmCLK1_CLK2_CURRENT_CNT 0x16EFE +#define mmCLK1_CLK3_CURRENT_CNT 0x16EFF +#define mmCLK1_CLK4_CURRENT_CNT 0x16F00 +#define mmCLK1_CLK5_CURRENT_CNT 0x16F01 + +#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A +#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93 +#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C +#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5 +#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE +#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7 + +#define mmCLK1_CLK0_DS_CNTL 0x16E83 +#define mmCLK1_CLK1_DS_CNTL 0x16E8C +#define mmCLK1_CLK2_DS_CNTL 0x16E95 +#define mmCLK1_CLK3_DS_CNTL 0x16E9E +#define mmCLK1_CLK4_DS_CNTL 0x16EA7 +#define mmCLK1_CLK5_DS_CNTL 0x16EB0 + +#define mmCLK1_CLK0_ALLOW_DS 0x16E84 +#define mmCLK1_CLK1_ALLOW_DS 0x16E8D +#define mmCLK1_CLK2_ALLOW_DS 0x16E96 +#define mmCLK1_CLK3_ALLOW_DS 0x16E9F +#define mmCLK1_CLK4_ALLOW_DS 0x16EA8 +#define mmCLK1_CLK5_ALLOW_DS 0x16EB1 + +#define mmCLK5_spll_field_8 0x1B04B +#define mmDENTIST_DISPCLK_CNTL 0x0124 +#define regDENTIST_DISPCLK_CNTL 0x0064 +#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 + +#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 +#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc +#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 +#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL +#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L +#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L + +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L + +// DENTIST_DISPCLK_CNTL +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13 +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14 +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L + +#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L + +#define REG(reg) \ + (clk_mgr->regs->reg) + +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg + +#define BASE(seg) BASE_INNER(seg) + +#define SR(reg_name)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name + +#define CLK_SR_DCN35(reg_name)\ + .reg_name = mm ## reg_name + +static const struct clk_mgr_registers clk_mgr_regs_dcn351 = { + CLK_REG_LIST_DCN35() +}; + +static const struct clk_mgr_shift clk_mgr_shift_dcn351 = { + CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT) +}; + +static const struct clk_mgr_mask clk_mgr_mask_dcn351 = { + CLK_COMMON_MASK_SH_LIST_DCN32(_MASK) +}; + +#define TO_CLK_MGR_DCN35(clk_mgr)\ + container_of(clk_mgr, struct clk_mgr_dcn35, base) + + +void dcn351_clk_mgr_construct( + struct dc_context *ctx, + struct clk_mgr_dcn35 *clk_mgr, + struct pp_smu_funcs *pp_smu, + struct dccg *dccg) +{ + /*register offset changed*/ + clk_mgr->base.regs = &clk_mgr_regs_dcn351; + clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn351; + clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn351; + + dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); + +} + + diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c index b77333817f18..2a74140d7ebf 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c @@ -36,15 +36,11 @@ #include "dcn20/dcn20_clk_mgr.h" - - #include "reg_helper.h" #include "core_types.h" #include "dcn35_smu.h" #include "dm_helpers.h" -/* TODO: remove this include once we ported over remaining clk mgr functions*/ -#include "dcn30/dcn30_clk_mgr.h" #include "dcn31/dcn31_clk_mgr.h" #include "dc_dmub_srv.h" @@ -55,35 +51,102 @@ #define DC_LOGGER \ clk_mgr->base.base.ctx->logger +#define DCN_BASE__INST0_SEG1 0x000000C0 +#define mmCLK1_CLK_PLL_REQ 0x16E37 + +#define mmCLK1_CLK0_DFS_CNTL 0x16E69 +#define mmCLK1_CLK1_DFS_CNTL 0x16E6C +#define mmCLK1_CLK2_DFS_CNTL 0x16E6F +#define mmCLK1_CLK3_DFS_CNTL 0x16E72 +#define mmCLK1_CLK4_DFS_CNTL 0x16E75 +#define mmCLK1_CLK5_DFS_CNTL 0x16E78 + +#define mmCLK1_CLK0_CURRENT_CNT 0x16EFB +#define mmCLK1_CLK1_CURRENT_CNT 0x16EFC +#define mmCLK1_CLK2_CURRENT_CNT 0x16EFD +#define mmCLK1_CLK3_CURRENT_CNT 0x16EFE +#define mmCLK1_CLK4_CURRENT_CNT 0x16EFF +#define mmCLK1_CLK5_CURRENT_CNT 0x16F00 + +#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A +#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93 +#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C +#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5 +#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE +#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7 + +#define mmCLK1_CLK0_DS_CNTL 0x16E83 +#define mmCLK1_CLK1_DS_CNTL 0x16E8C +#define mmCLK1_CLK2_DS_CNTL 0x16E95 +#define mmCLK1_CLK3_DS_CNTL 0x16E9E +#define mmCLK1_CLK4_DS_CNTL 0x16EA7 +#define mmCLK1_CLK5_DS_CNTL 0x16EB0 + +#define mmCLK1_CLK0_ALLOW_DS 0x16E84 +#define mmCLK1_CLK1_ALLOW_DS 0x16E8D +#define mmCLK1_CLK2_ALLOW_DS 0x16E96 +#define mmCLK1_CLK3_ALLOW_DS 0x16E9F +#define mmCLK1_CLK4_ALLOW_DS 0x16EA8 +#define mmCLK1_CLK5_ALLOW_DS 0x16EB1 + +#define mmCLK5_spll_field_8 0x1B04B +#define mmDENTIST_DISPCLK_CNTL 0x0124 +#define regDENTIST_DISPCLK_CNTL 0x0064 +#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1 + +#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 +#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc +#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 +#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL +#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L +#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L + +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L +#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L +// DENTIST_DISPCLK_CNTL +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13 +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14 +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18 +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L +#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L +#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L + +#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L + +#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0 +#undef FN +#define FN(reg_name, field_name) \ + clk_mgr->clk_mgr_shift->field_name, clk_mgr->clk_mgr_mask->field_name -#define regCLK1_CLK_PLL_REQ 0x0237 -#define regCLK1_CLK_PLL_REQ_BASE_IDX 0 +#define REG(reg) \ + (clk_mgr->regs->reg) -#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 -#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc -#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 -#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL -#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L -#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L +#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg -#define regCLK1_CLK2_BYPASS_CNTL 0x029c -#define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0 +#define BASE(seg) BASE_INNER(seg) -#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0 -#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10 -#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L -#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L +#define SR(reg_name)\ + .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \ + reg ## reg_name -#define regCLK5_0_CLK5_spll_field_8 0x464b -#define regCLK5_0_CLK5_spll_field_8_BASE_IDX 0 +#define CLK_SR_DCN35(reg_name)\ + .reg_name = mm ## reg_name -#define CLK5_0_CLK5_spll_field_8__spll_ssc_en__SHIFT 0xd -#define CLK5_0_CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L +static const struct clk_mgr_registers clk_mgr_regs_dcn35 = { + CLK_REG_LIST_DCN35() +}; -#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0 +static const struct clk_mgr_shift clk_mgr_shift_dcn35 = { + CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT) +}; -#define REG(reg_name) \ - (ctx->clk_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) +static const struct clk_mgr_mask clk_mgr_mask_dcn35 = { + CLK_COMMON_MASK_SH_LIST_DCN32(_MASK) +}; #define TO_CLK_MGR_DCN35(clk_mgr)\ container_of(clk_mgr, struct clk_mgr_dcn35, base) @@ -452,7 +515,6 @@ static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) struct fixed31_32 pll_req; unsigned int fbmult_frac_val = 0; unsigned int fbmult_int_val = 0; - struct dc_context *ctx = clk_mgr->base.ctx; /* * Register value of fbmult is in 8.16 format, we are converting to 314.32 @@ -512,12 +574,12 @@ static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base) { struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); - struct dc_context *ctx = clk_mgr->base.ctx; + uint32_t ssc_enable; - REG_GET(CLK5_0_CLK5_spll_field_8, spll_ssc_en, &ssc_enable); + ssc_enable = REG_READ(CLK5_spll_field_8) & CLK5_spll_field_8__spll_ssc_en_MASK; - return ssc_enable == 1; + return ssc_enable != 0; } static void init_clk_states(struct clk_mgr *clk_mgr) @@ -632,6 +694,7 @@ static struct wm_table lpddr5_wm_table = { }; static DpmClocks_t_dcn35 dummy_clocks; +static DpmClocks_t_dcn351 dummy_clocks_dcn351; static struct dcn35_watermarks dummy_wms = { 0 }; @@ -642,10 +705,10 @@ static struct dcn35_ss_info_table ss_info_table = { static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr) { - struct dc_context *ctx = clk_mgr->base.ctx; - uint32_t clock_source; + uint32_t clock_source = 0; + + clock_source = REG_READ(CLK1_CLK2_BYPASS_CNTL) & CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK; - REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source); // If it's DFS mode, clock_source is 0. if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) { clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source]; @@ -755,6 +818,22 @@ static void dcn35_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr, dcn35_smu_transfer_dpm_table_smu_2_dram(clk_mgr); } +static void dcn351_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr, + struct dcn351_smu_dpm_clks *smu_dpm_clks) +{ + DpmClocks_t_dcn351 *table = smu_dpm_clks->dpm_clks; + + if (!clk_mgr->smu_ver) + return; + if (!table || smu_dpm_clks->mc_address.quad_part == 0) + return; + memset(table, 0, sizeof(*table)); + dcn35_smu_set_dram_addr_high(clk_mgr, + smu_dpm_clks->mc_address.high_part); + dcn35_smu_set_dram_addr_low(clk_mgr, + smu_dpm_clks->mc_address.low_part); + dcn35_smu_transfer_dpm_table_smu_2_dram(clk_mgr); +} static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks) { uint32_t max = 0; @@ -1093,6 +1172,57 @@ struct clk_mgr_funcs dcn35_fpga_funcs = { .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz, }; +static void translate_to_DpmClocks_t_dcn35(struct dcn351_smu_dpm_clks *smu_dpm_clks_a, + struct dcn35_smu_dpm_clks *smu_dpm_clks_b) +{ + /*translate two structures and only take need clock tables*/ + uint8_t i; + + if (smu_dpm_clks_a == NULL || smu_dpm_clks_b == NULL || + smu_dpm_clks_a->dpm_clks == NULL || smu_dpm_clks_b->dpm_clks == NULL) + return; + + for (i = 0; i < NUM_DCFCLK_DPM_LEVELS; i++) + smu_dpm_clks_b->dpm_clks->DcfClocks[i] = smu_dpm_clks_a->dpm_clks->DcfClocks[i]; + + for (i = 0; i < NUM_DISPCLK_DPM_LEVELS; i++) + smu_dpm_clks_b->dpm_clks->DispClocks[i] = smu_dpm_clks_a->dpm_clks->DispClocks[i]; + + for (i = 0; i < NUM_DPPCLK_DPM_LEVELS; i++) + smu_dpm_clks_b->dpm_clks->DppClocks[i] = smu_dpm_clks_a->dpm_clks->DppClocks[i]; + + for (i = 0; i < NUM_FCLK_DPM_LEVELS; i++) { + smu_dpm_clks_b->dpm_clks->FclkClocks_Freq[i] = smu_dpm_clks_a->dpm_clks->FclkClocks_Freq[i]; + smu_dpm_clks_b->dpm_clks->FclkClocks_Voltage[i] = smu_dpm_clks_a->dpm_clks->FclkClocks_Voltage[i]; + } + for (i = 0; i < NUM_MEM_PSTATE_LEVELS; i++) { + smu_dpm_clks_b->dpm_clks->MemPstateTable[i].MemClk = + smu_dpm_clks_a->dpm_clks->MemPstateTable[i].MemClk; + smu_dpm_clks_b->dpm_clks->MemPstateTable[i].UClk = + smu_dpm_clks_a->dpm_clks->MemPstateTable[i].UClk; + smu_dpm_clks_b->dpm_clks->MemPstateTable[i].Voltage = + smu_dpm_clks_a->dpm_clks->MemPstateTable[i].Voltage; + smu_dpm_clks_b->dpm_clks->MemPstateTable[i].WckRatio = + smu_dpm_clks_a->dpm_clks->MemPstateTable[i].WckRatio; + } + smu_dpm_clks_b->dpm_clks->MaxGfxClk = smu_dpm_clks_a->dpm_clks->MaxGfxClk; + smu_dpm_clks_b->dpm_clks->MinGfxClk = smu_dpm_clks_a->dpm_clks->MinGfxClk; + smu_dpm_clks_b->dpm_clks->NumDcfClkLevelsEnabled = + smu_dpm_clks_a->dpm_clks->NumDcfClkLevelsEnabled; + smu_dpm_clks_b->dpm_clks->NumDispClkLevelsEnabled = + smu_dpm_clks_a->dpm_clks->NumDispClkLevelsEnabled; + smu_dpm_clks_b->dpm_clks->NumFclkLevelsEnabled = + smu_dpm_clks_a->dpm_clks->NumFclkLevelsEnabled; + smu_dpm_clks_b->dpm_clks->NumMemPstatesEnabled = + smu_dpm_clks_a->dpm_clks->NumMemPstatesEnabled; + smu_dpm_clks_b->dpm_clks->NumSocClkLevelsEnabled = + smu_dpm_clks_a->dpm_clks->NumSocClkLevelsEnabled; + + for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) { + smu_dpm_clks_b->dpm_clks->SocClocks[i] = smu_dpm_clks_a->dpm_clks->SocClocks[i]; + smu_dpm_clks_b->dpm_clks->SocVoltage[i] = smu_dpm_clks_a->dpm_clks->SocVoltage[i]; + } +} void dcn35_clk_mgr_construct( struct dc_context *ctx, struct clk_mgr_dcn35 *clk_mgr, @@ -1100,6 +1230,7 @@ void dcn35_clk_mgr_construct( struct dccg *dccg) { struct dcn35_smu_dpm_clks smu_dpm_clks = { 0 }; + struct dcn351_smu_dpm_clks smu_dpm_clks_dcn351 = { 0 }; clk_mgr->base.base.ctx = ctx; clk_mgr->base.base.funcs = &dcn35_funcs; @@ -1112,6 +1243,12 @@ void dcn35_clk_mgr_construct( clk_mgr->base.dprefclk_ss_divider = 1000; clk_mgr->base.ss_on_dprefclk = false; clk_mgr->base.dfs_ref_freq_khz = 48000; + if (ctx->dce_version == DCN_VERSION_3_5) { + clk_mgr->base.regs = &clk_mgr_regs_dcn35; + clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn35; + clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn35; + } + clk_mgr->smu_wm_set.wm_set = (struct dcn35_watermarks *)dm_helpers_allocate_gpu_mem( clk_mgr->base.base.ctx, @@ -1130,14 +1267,24 @@ void dcn35_clk_mgr_construct( DC_MEM_ALLOC_TYPE_GART, sizeof(DpmClocks_t_dcn35), &smu_dpm_clks.mc_address.quad_part); - if (smu_dpm_clks.dpm_clks == NULL) { smu_dpm_clks.dpm_clks = &dummy_clocks; smu_dpm_clks.mc_address.quad_part = 0; } - ASSERT(smu_dpm_clks.dpm_clks); + if (ctx->dce_version == DCN_VERSION_3_51) { + smu_dpm_clks_dcn351.dpm_clks = (DpmClocks_t_dcn351 *)dm_helpers_allocate_gpu_mem( + clk_mgr->base.base.ctx, + DC_MEM_ALLOC_TYPE_GART, + sizeof(DpmClocks_t_dcn351), + &smu_dpm_clks_dcn351.mc_address.quad_part); + if (smu_dpm_clks_dcn351.dpm_clks == NULL) { + smu_dpm_clks_dcn351.dpm_clks = &dummy_clocks_dcn351; + smu_dpm_clks_dcn351.mc_address.quad_part = 0; + } + } + clk_mgr->base.smu_ver = dcn35_smu_get_smu_version(&clk_mgr->base); if (clk_mgr->base.smu_ver) @@ -1166,7 +1313,11 @@ void dcn35_clk_mgr_construct( if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) { int i; - dcn35_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks); + if (ctx->dce_version == DCN_VERSION_3_51) { + dcn351_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks_dcn351); + translate_to_DpmClocks_t_dcn35(&smu_dpm_clks_dcn351, &smu_dpm_clks); + } else + dcn35_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks); DC_LOG_SMU("NumDcfClkLevelsEnabled: %d\n" "NumDispClkLevelsEnabled: %d\n" "NumSocClkLevelsEnabled: %d\n" @@ -1227,6 +1378,10 @@ void dcn35_clk_mgr_construct( dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_GART, smu_dpm_clks.dpm_clks); + if (smu_dpm_clks_dcn351.dpm_clks && smu_dpm_clks_dcn351.mc_address.quad_part != 0) + dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_GART, + smu_dpm_clks_dcn351.dpm_clks); + if (ctx->dc->config.disable_ips != DMUB_IPS_DISABLE_ALL) { bool ips_support = false; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h index 1203dc605b12..a12a9bf90806 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h @@ -60,4 +60,8 @@ void dcn35_clk_mgr_construct(struct dc_context *ctx, void dcn35_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int); +void dcn351_clk_mgr_construct(struct dc_context *ctx, + struct clk_mgr_dcn35 *clk_mgr, + struct pp_smu_funcs *pp_smu, + struct dccg *dccg); #endif //__DCN35_CLK_MGR_H__ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h index 3fae13c73934..ab9d21ba0c43 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_smu.h @@ -126,18 +126,31 @@ typedef struct { uint32_t MaxGfxClk; } DpmClocks_t_dcn35; - -// Throttler Status Bitmask - - - - - - - - - - +typedef struct { + uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS]; + uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS]; + uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS]; + uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS]; + uint32_t VClocks0[NUM_VCN_DPM_LEVELS]; + uint32_t VClocks1[NUM_VCN_DPM_LEVELS]; + uint32_t DClocks0[NUM_VCN_DPM_LEVELS]; + uint32_t DClocks1[NUM_VCN_DPM_LEVELS]; + uint32_t VPEClocks[NUM_VPE_DPM_LEVELS]; + uint32_t FclkClocks_Freq[NUM_FCLK_DPM_LEVELS]; + uint32_t FclkClocks_Voltage[NUM_FCLK_DPM_LEVELS]; + uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS]; + MemPstateTable_t MemPstateTable[NUM_MEM_PSTATE_LEVELS]; + uint8_t NumDcfClkLevelsEnabled; + uint8_t NumDispClkLevelsEnabled; // Applies to both Dispclk and Dppclk + uint8_t NumSocClkLevelsEnabled; + uint8_t Vcn0ClkLevelsEnabled; // Applies to both Vclk0 and Dclk0 + uint8_t Vcn1ClkLevelsEnabled; // Applies to both Vclk1 and Dclk1 + uint8_t VpeClkLevelsEnabled; + uint8_t NumMemPstatesEnabled; + uint8_t NumFclkLevelsEnabled; + uint32_t MinGfxClk; + uint32_t MaxGfxClk; +} DpmClocks_t_dcn351; #define TABLE_BIOS_IF 0 // Called by BIOS #define TABLE_WATERMARKS 1 // Called by DAL through VBIOS @@ -163,6 +176,10 @@ struct dcn35_smu_dpm_clks { union large_integer mc_address; }; +struct dcn351_smu_dpm_clks { + DpmClocks_t_dcn351 *dpm_clks; + union large_integer mc_address; +}; /* TODO: taken from vgh, may not be correct */ struct display_idle_optimization { unsigned int df_request_disabled : 1; diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c index 8cfc5f435937..5b4e1e8a9ae2 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.c @@ -141,6 +141,20 @@ static bool dcn401_is_ppclk_idle_dpm_enabled(struct clk_mgr_internal *clk_mgr, P return ppclk_idle_dpm_enabled; } +static bool dcn401_is_df_throttle_opt_enabled(struct clk_mgr_internal *clk_mgr) +{ + bool is_df_throttle_opt_enabled = false; + + if (ASICREV_IS_GC_12_0_1_A0(clk_mgr->base.ctx->asic_id.hw_internal_rev) && + clk_mgr->smu_ver >= 0x663500) { + is_df_throttle_opt_enabled = !clk_mgr->base.ctx->dc->debug.force_subvp_df_throttle; + } + + is_df_throttle_opt_enabled &= clk_mgr->smu_present; + + return is_df_throttle_opt_enabled; +} + /* Query SMU for all clock states for a particular clock */ static void dcn401_init_single_clock(struct clk_mgr_internal *clk_mgr, PPCLK_e clk, unsigned int *entry_0, unsigned int *num_levels) @@ -869,6 +883,12 @@ static void dcn401_execute_block_sequence(struct clk_mgr *clk_mgr_base, unsigned params->update_idle_hardmin_params.uclk_mhz, params->update_idle_hardmin_params.fclk_mhz); break; + case CLK_MGR401_UPDATE_SUBVP_HARDMINS: + dcn401_smu_set_subvp_uclk_fclk_hardmin( + clk_mgr_internal, + params->update_idle_hardmin_params.uclk_mhz, + params->update_idle_hardmin_params.fclk_mhz); + break; case CLK_MGR401_UPDATE_DEEP_SLEEP_DCFCLK: dcn401_smu_set_min_deep_sleep_dcef_clk( clk_mgr_internal, @@ -945,15 +965,21 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( bool update_active_uclk = false; bool update_idle_fclk = false; bool update_idle_uclk = false; + bool update_subvp_prefetch_dramclk = false; + bool update_subvp_prefetch_fclk = false; bool is_idle_dpm_enabled = dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_UCLK) && dcn401_is_ppclk_dpm_enabled(clk_mgr_internal, PPCLK_FCLK) && dcn401_is_ppclk_idle_dpm_enabled(clk_mgr_internal, PPCLK_UCLK) && dcn401_is_ppclk_idle_dpm_enabled(clk_mgr_internal, PPCLK_FCLK); + bool is_df_throttle_opt_enabled = is_idle_dpm_enabled && + dcn401_is_df_throttle_opt_enabled(clk_mgr_internal); int total_plane_count = clk_mgr_helper_get_active_plane_cnt(dc, context); int active_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.dramclk_khz); int active_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.fclk_khz); int idle_uclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_dramclk_khz); int idle_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.idle_fclk_khz); + int subvp_prefetch_dramclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_dramclk_khz); + int subvp_prefetch_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_fclk_khz); unsigned int num_steps = 0; @@ -1109,6 +1135,12 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( } } + if (should_set_clock(safe_to_lower, new_clocks->subvp_prefetch_dramclk_khz, clk_mgr_base->clks.subvp_prefetch_dramclk_khz)) { + clk_mgr_base->clks.subvp_prefetch_dramclk_khz = new_clocks->subvp_prefetch_dramclk_khz; + update_subvp_prefetch_dramclk = true; + subvp_prefetch_dramclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_dramclk_khz); + } + /* FCLK */ /* Always update saved value, even if new value not set due to P-State switching unsupported */ if (should_set_clock(safe_to_lower, new_clocks->fclk_khz, clk_mgr_base->clks.fclk_khz)) { @@ -1129,6 +1161,12 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( } } + if (should_set_clock(safe_to_lower, new_clocks->subvp_prefetch_fclk_khz, clk_mgr_base->clks.subvp_prefetch_fclk_khz)) { + clk_mgr_base->clks.subvp_prefetch_fclk_khz = new_clocks->subvp_prefetch_fclk_khz; + update_subvp_prefetch_fclk = true; + subvp_prefetch_fclk_mhz = khz_to_mhz_ceil(clk_mgr_base->clks.subvp_prefetch_fclk_khz); + } + /* When idle DPM is enabled, need to send active and idle hardmins separately */ /* CLK_MGR401_UPDATE_ACTIVE_HARDMINS */ if ((update_active_uclk || update_active_fclk) && is_idle_dpm_enabled) { @@ -1146,6 +1184,14 @@ static unsigned int dcn401_build_update_bandwidth_clocks_sequence( num_steps++; } + /* CLK_MGR401_UPDATE_SUBVP_HARDMINS */ + if ((update_subvp_prefetch_dramclk || update_subvp_prefetch_fclk) && is_df_throttle_opt_enabled) { + block_sequence[num_steps].params.update_idle_hardmin_params.uclk_mhz = subvp_prefetch_dramclk_mhz; + block_sequence[num_steps].params.update_idle_hardmin_params.fclk_mhz = subvp_prefetch_fclk_mhz; + block_sequence[num_steps].func = CLK_MGR401_UPDATE_SUBVP_HARDMINS; + num_steps++; + } + /* set UCLK to requested value if P-State switching is supported, or to re-enable P-State switching */ if (update_active_uclk || update_idle_uclk) { if (!is_idle_dpm_enabled) { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h index 8b0461992b22..6c9ae5ca2c7e 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr.h @@ -90,6 +90,7 @@ enum dcn401_clk_mgr_block_sequence_func { CLK_MGR401_UPDATE_DTBCLK_DTO, CLK_MGR401_UPDATE_DENTIST, CLK_MGR401_UPDATE_PSR_WAIT_LOOP, + CLK_MGR401_UPDATE_SUBVP_HARDMINS, }; struct dcn401_clk_mgr_block_sequence { diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c index 7700477d019b..b02a41179b41 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.c @@ -21,6 +21,11 @@ #define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); } +/* temporary define */ +#ifndef DALSMC_MSG_SubvpUclkFclk +#define DALSMC_MSG_SubvpUclkFclk 0x1B +#endif + /* * Function to be used instead of REG_WAIT macro because the wait ends when * the register is NOT EQUAL to zero, and because the translation in msg_if.h @@ -296,6 +301,24 @@ bool dcn401_smu_set_active_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr, return success; } +bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr, + uint16_t uclk_freq_mhz, + uint16_t fclk_freq_mhz) +{ + uint32_t response = 0; + bool success; + + /* 15:0 for uclk, 32:16 for fclk */ + uint32_t param = (fclk_freq_mhz << 16) | uclk_freq_mhz; + + smu_print("SMU Set active hardmin by freq: uclk_freq_mhz = %d MHz, fclk_freq_mhz = %d MHz\n", uclk_freq_mhz, fclk_freq_mhz); + + success = dcn401_smu_send_msg_with_param(clk_mgr, + DALSMC_MSG_SubvpUclkFclk, param, &response); + + return success; +} + void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz) { smu_print("SMU Set min deep sleep dcef clk: freq_mhz = %d MHz\n", freq_mhz); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h index 651fb8d62864..42cf7885a7cb 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn401/dcn401_clk_mgr_smu_msg.h @@ -23,6 +23,9 @@ bool dcn401_smu_set_idle_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr, bool dcn401_smu_set_active_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr, uint16_t uclk_freq_mhz, uint16_t fclk_freq_mhz); +bool dcn401_smu_set_subvp_uclk_fclk_hardmin(struct clk_mgr_internal *clk_mgr, + uint16_t uclk_freq_mhz, + uint16_t fclk_freq_mhz); void dcn401_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz); void dcn401_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 49fe7dcf9372..dfa36368ae63 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -579,7 +579,7 @@ dc_stream_forward_dmcu_crc_window(struct dmcu *dmcu, bool dc_stream_forward_crc_window(struct dc_stream_state *stream, - struct rect *rect, bool is_stop) + struct rect *rect, uint8_t phy_id, bool is_stop) { struct dmcu *dmcu; struct dc_dmub_srv *dmub_srv; @@ -598,7 +598,7 @@ dc_stream_forward_crc_window(struct dc_stream_state *stream, if (i == MAX_PIPES) return false; - mux_mapping.phy_output_num = stream->link->link_enc_hw_inst; + mux_mapping.phy_output_num = phy_id; mux_mapping.otg_output_num = pipe->stream_res.tg->inst; dmcu = dc->res_pool->dmcu; @@ -2153,6 +2153,11 @@ enum dc_status dc_commit_streams(struct dc *dc, struct dc_commit_streams_params struct dc_stream_state *stream = params->streams[i]; struct dc_stream_status *status = dc_stream_get_status(stream); + /* revalidate streams */ + res = dc_validate_stream(dc, stream); + if (res != DC_OK) + return res; + dc_stream_log(dc, stream); set[i].stream = stream; @@ -2982,6 +2987,10 @@ static void copy_surface_update_to_plane( if (srf_update->cursor_csc_color_matrix) surface->cursor_csc_color_matrix = *srf_update->cursor_csc_color_matrix; + + if (srf_update->bias_and_scale.bias_and_scale_valid) + surface->bias_and_scale = + srf_update->bias_and_scale; } static void copy_stream_update_to_stream(struct dc *dc, @@ -5307,11 +5316,13 @@ void dc_set_power_state(struct dc *dc, enum dc_acpi_cm_power_state power_state) dc->vm_pa_config.valid) { dc->hwss.init_sys_ctx(dc->hwseq, dc, &dc->vm_pa_config); } - + /*mark d0 last*/ + dc->power_state = power_state; break; default: ASSERT(dc->current_state->stream_count == 0); - + /*mark d3 first*/ + dc->power_state = power_state; dc_dmub_srv_notify_fw_dc_power_state(dc->ctx->dmub_srv, power_state); dc_state_destruct(dc->current_state); @@ -6056,7 +6067,7 @@ void dc_query_current_properties(struct dc *dc, struct dc_current_properties *pr bool subvp_sw_cursor_req = false; for (i = 0; i < dc->current_state->stream_count; i++) { - if (check_subvp_sw_cursor_fallback_req(dc, dc->current_state->streams[i])) { + if (check_subvp_sw_cursor_fallback_req(dc, dc->current_state->streams[i]) && !dc->current_state->streams[i]->hw_cursor_req) { subvp_sw_cursor_req = true; break; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index 55dc482d9b36..701b7e4f2920 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -605,17 +605,6 @@ bool dc_stream_remove_writeback(struct dc *dc, return true; } -bool dc_stream_warmup_writeback(struct dc *dc, - int num_dwb, - struct dc_writeback_info *wb_info) -{ - dc_exit_ips_for_hw_access(dc); - - if (dc->hwss.mmhubbub_warmup) - return dc->hwss.mmhubbub_warmup(dc, num_dwb, wb_info); - else - return false; -} uint32_t dc_stream_get_vblank_counter(const struct dc_stream_state *stream) { uint8_t i; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c index ccbb15f1638c..f3471d45b312 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_surface.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_surface.c @@ -83,13 +83,6 @@ uint8_t dc_plane_get_pipe_mask(struct dc_state *dc_state, const struct dc_plane /******************************************************************************* * Public functions ******************************************************************************/ -void enable_surface_flip_reporting(struct dc_plane_state *plane_state, - uint32_t controller_id) -{ - plane_state->irq_source = controller_id + DC_IRQ_SOURCE_PFLIP1 - 1; - /*register_flip_interrupt(surface);*/ -} - struct dc_plane_state *dc_create_plane_state(const struct dc *dc) { struct dc_plane_state *plane_state = kvzalloc(sizeof(*plane_state), @@ -277,4 +270,50 @@ void dc_3dlut_func_retain(struct dc_3dlut *lut) kref_get(&lut->refcount); } +void dc_plane_force_update_for_panic(struct dc_plane_state *plane_state, + bool clear_tiling) +{ + struct dc *dc; + int i; + + if (!plane_state) + return; + + dc = plane_state->ctx->dc; + if (!dc || !dc->current_state) + return; + + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx) + continue; + + if (dc->ctx->dce_version >= DCE_VERSION_MAX) { + struct hubp *hubp = pipe_ctx->plane_res.hubp; + if (!hubp) + continue; + /* if framebuffer is tiled, disable tiling */ + if (clear_tiling && hubp->funcs->hubp_clear_tiling) + hubp->funcs->hubp_clear_tiling(hubp); + + /* force page flip to see the new content of the framebuffer */ + hubp->funcs->hubp_program_surface_flip_and_addr(hubp, + &plane_state->address, + true); + } else { + struct mem_input *mi = pipe_ctx->plane_res.mi; + if (!mi) + continue; + /* if framebuffer is tiled, disable tiling */ + if (clear_tiling && mi->funcs->mem_input_clear_tiling) + mi->funcs->mem_input_clear_tiling(mi); + + /* force page flip to see the new content of the framebuffer */ + mi->funcs->mem_input_program_surface_flip_and_addr(mi, + &plane_state->address, + true); + } + } +} diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index e9b9126c0401..8c6347413038 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -55,7 +55,7 @@ struct aux_payload; struct set_config_cmd_payload; struct dmub_notification; -#define DC_VER "3.2.310" +#define DC_VER "3.2.314" #define MAX_SURFACES 3 #define MAX_PLANES 6 @@ -463,6 +463,7 @@ struct dc_config { bool enable_auto_dpm_test_logs; unsigned int disable_ips; unsigned int disable_ips_in_vpb; + bool disable_ips_in_dpms_off; bool usb4_bw_alloc_support; bool allow_0_dtb_clk; bool use_assr_psp_message; @@ -628,6 +629,8 @@ struct dc_clocks { int bw_dispclk_khz; int idle_dramclk_khz; int idle_fclk_khz; + int subvp_prefetch_dramclk_khz; + int subvp_prefetch_fclk_khz; }; struct dc_bw_validation_profile { @@ -1055,6 +1058,7 @@ struct dc_debug_options { bool dml21_force_pstate_method; uint32_t dml21_force_pstate_method_values[MAX_PIPES]; uint32_t dml21_disable_pstate_method_mask; + union fw_assisted_mclk_switch_version fams_version; union dmub_fams2_global_feature_config fams2_config; bool enable_legacy_clock_update; unsigned int force_cositing; @@ -1070,6 +1074,7 @@ struct dc_debug_options { bool skip_full_updated_if_possible; unsigned int enable_oled_edp_power_up_opt; bool enable_hblank_borrow; + bool force_subvp_df_throttle; }; @@ -1526,6 +1531,7 @@ struct dc_surface_update { const struct dc_cm2_parameters *cm2_params; const struct dc_csc_transform *cursor_csc_color_matrix; unsigned int sdr_white_level_nits; + struct dc_bias_and_scale bias_and_scale; }; /* diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c index f90fc154549a..44ff9abe2880 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c @@ -1245,7 +1245,7 @@ static int count_active_streams(const struct dc *dc) for (i = 0; i < dc->current_state->stream_count; ++i) { struct dc_stream_state *stream = dc->current_state->streams[i]; - if (stream && !stream->dpms_off) + if (stream && (!stream->dpms_off || dc->config.disable_ips_in_dpms_off)) count += 1; } @@ -1694,10 +1694,10 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, { uint8_t num_cmds = 1; uint32_t i; - union dmub_rb_cmd cmd[MAX_STREAMS + 1]; + union dmub_rb_cmd cmd[2 * MAX_STREAMS + 1]; struct dmub_rb_cmd_fams2 *global_cmd = &cmd[0].fams2_config; - memset(cmd, 0, sizeof(union dmub_rb_cmd) * (MAX_STREAMS + 1)); + memset(cmd, 0, sizeof(union dmub_rb_cmd) * (2 * MAX_STREAMS + 1)); /* fill in generic command header */ global_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; global_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; @@ -1714,17 +1714,26 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, /* construct per-stream configs */ for (i = 0; i < context->bw_ctx.bw.dcn.fams2_global_config.num_streams; i++) { - struct dmub_rb_cmd_fams2 *stream_cmd = &cmd[i+1].fams2_config; + struct dmub_rb_cmd_fams2 *stream_base_cmd = &cmd[i+1].fams2_config; + struct dmub_rb_cmd_fams2 *stream_sub_state_cmd = &cmd[i+1+context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config; /* configure command header */ - stream_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; - stream_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; - stream_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); - stream_cmd->header.multi_cmd_pending = 1; - /* copy stream static state */ - memcpy(&stream_cmd->config.stream, - &context->bw_ctx.bw.dcn.fams2_stream_params[i], - sizeof(struct dmub_fams2_stream_static_state)); + stream_base_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; + stream_base_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; + stream_base_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + stream_base_cmd->header.multi_cmd_pending = 1; + stream_sub_state_cmd->header.type = DMUB_CMD__FW_ASSISTED_MCLK_SWITCH; + stream_sub_state_cmd->header.sub_type = DMUB_CMD__FAMS2_CONFIG; + stream_sub_state_cmd->header.payload_bytes = sizeof(struct dmub_rb_cmd_fams2) - sizeof(struct dmub_cmd_header); + stream_sub_state_cmd->header.multi_cmd_pending = 1; + /* copy stream static base state */ + memcpy(&stream_base_cmd->config, + &context->bw_ctx.bw.dcn.fams2_stream_base_params[i], + sizeof(union dmub_cmd_fams2_config)); + /* copy stream static sub state */ + memcpy(&stream_sub_state_cmd->config, + &context->bw_ctx.bw.dcn.fams2_stream_sub_params[i], + sizeof(union dmub_cmd_fams2_config)); } } @@ -1735,8 +1744,8 @@ void dc_dmub_srv_fams2_update_config(struct dc *dc, if (enable && context->bw_ctx.bw.dcn.fams2_global_config.features.bits.enable) { /* set multi pending for global, and unset for last stream cmd */ global_cmd->header.multi_cmd_pending = 1; - cmd[context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config.header.multi_cmd_pending = 0; - num_cmds += context->bw_ctx.bw.dcn.fams2_global_config.num_streams; + cmd[2 * context->bw_ctx.bw.dcn.fams2_global_config.num_streams].fams2_config.header.multi_cmd_pending = 0; + num_cmds += 2 * context->bw_ctx.bw.dcn.fams2_global_config.num_streams; } dm_execute_dmub_cmd_list(dc->ctx, num_cmds, cmd, DM_DMUB_WAIT_TYPE_WAIT); diff --git a/drivers/gpu/drm/amd/display/dc/dc_plane.h b/drivers/gpu/drm/amd/display/dc/dc_plane.h index bd37ec82b42d..fabcefeda288 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_plane.h +++ b/drivers/gpu/drm/amd/display/dc/dc_plane.h @@ -34,4 +34,7 @@ const struct dc_plane_status *dc_plane_get_status( void dc_plane_state_retain(struct dc_plane_state *plane_state); void dc_plane_state_release(struct dc_plane_state *plane_state); +void dc_plane_force_update_for_panic(struct dc_plane_state *plane_state, + bool clear_tiling); + #endif /* _DC_PLANE_H_ */ diff --git a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c index 0e310fd48b5c..3518eb1b8cd1 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c +++ b/drivers/gpu/drm/amd/display/dc/dc_spl_translate.c @@ -64,6 +64,13 @@ static void populate_inits_from_splinits(struct scl_inits *inits, inits->h_c = dc_fixpt_from_int_dy(spl_inits->h_filter_init_int_c, spl_inits->h_filter_init_frac_c >> 5, 0, 19); inits->v_c = dc_fixpt_from_int_dy(spl_inits->v_filter_init_int_c, spl_inits->v_filter_init_frac_c >> 5, 0, 19); } +static void populate_splformat_from_format(enum spl_pixel_format *spl_pixel_format, const enum pixel_format pixel_format) +{ + if (pixel_format < PIXEL_FORMAT_INVALID) + *spl_pixel_format = (enum spl_pixel_format)pixel_format; + else + *spl_pixel_format = SPL_PIXEL_FORMAT_INVALID; +} /// @brief Translate SPL input parameters from pipe context /// @param pipe_ctx /// @param spl_in @@ -89,7 +96,7 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->callbacks = dcn2_spl_callbacks; } // Make format field from spl_in point to plane_res scl_data format - spl_in->basic_in.format = (enum spl_pixel_format)pipe_ctx->plane_res.scl_data.format; + populate_splformat_from_format(&spl_in->basic_in.format, pipe_ctx->plane_res.scl_data.format); // Make view_format from basic_out point to view_format from stream spl_in->basic_out.view_format = (enum spl_view_3d)stream->view_format; // Populate spl input basic input clip rect from plane state clip rect @@ -108,12 +115,14 @@ void translate_SPL_in_params_from_pipe_ctx(struct pipe_ctx *pipe_ctx, struct spl spl_in->basic_in.horizontal_mirror = plane_state->horizontal_mirror; // Calculate horizontal splits and split index - spl_in->basic_in.mpc_combine_h = resource_get_mpc_slice_count(pipe_ctx); + spl_in->basic_in.num_h_slices_recout_width_align.use_recout_width_aligned = false; + spl_in->basic_in.num_h_slices_recout_width_align.num_slices_recout_width.mpc_num_h_slices = + resource_get_mpc_slice_count(pipe_ctx); if (stream->view_format == VIEW_3D_FORMAT_SIDE_BY_SIDE) - spl_in->basic_in.mpc_combine_v = 0; + spl_in->basic_in.mpc_h_slice_index = 0; else - spl_in->basic_in.mpc_combine_v = resource_get_mpc_slice_index(pipe_ctx); + spl_in->basic_in.mpc_h_slice_index = resource_get_mpc_slice_index(pipe_ctx); populate_splrect_from_rect(&spl_in->basic_out.odm_slice_rect, &odm_slice_src); spl_in->basic_out.odm_combine_factor = 0; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index 413970588a26..713884103aea 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -447,10 +447,6 @@ enum dc_status dc_stream_add_dsc_to_resource(struct dc *dc, struct dc_state *state, struct dc_stream_state *stream); -bool dc_stream_warmup_writeback(struct dc *dc, - int num_dwb, - struct dc_writeback_info *wb_info); - bool dc_stream_dmdata_status_done(struct dc *dc, struct dc_stream_state *stream); bool dc_stream_set_dynamic_metadata(struct dc *dc, @@ -541,6 +537,7 @@ bool dc_stream_get_crtc_position(struct dc *dc, #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY) bool dc_stream_forward_crc_window(struct dc_stream_state *stream, struct rect *rect, + uint8_t phy_id, bool is_stop); #endif diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index b700608e4240..077337698e0a 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -1105,6 +1105,9 @@ static bool dcn401_program_pix_clk( &dto_params); } else { + if (pll_settings->actual_pix_clk_100hz > 6000000UL) + return false; + /* disables DP DTO when provided with TMDS signal type */ clock_source->ctx->dc->res_pool->dccg->funcs->set_dp_dto( clock_source->ctx->dc->res_pool->dccg, diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c index f5e1d9caee4c..ebd174be5786 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_mem_input.c @@ -481,7 +481,6 @@ static void program_tiling( } } - static void program_size_and_rotation( struct dce_mem_input *dce_mi, enum dc_rotation_angle rotation, @@ -627,6 +626,27 @@ static void program_grph_pixel_format( GRPH_PRESCALE_B_SIGN, sign); } +static void dce_mi_clear_tiling( + struct mem_input *mi) +{ + struct dce_mem_input *dce_mi = TO_DCE_MEM_INPUT(mi); + + if (dce_mi->masks->GRPH_SW_MODE) { /* GFX9 */ + REG_UPDATE(GRPH_CONTROL, + GRPH_SW_MODE, DC_SW_LINEAR); + } + + if (dce_mi->masks->GRPH_MICRO_TILE_MODE) { /* GFX8 */ + REG_UPDATE(GRPH_CONTROL, + GRPH_ARRAY_MODE, DC_SW_LINEAR); + } + + if (dce_mi->masks->GRPH_ARRAY_MODE) { /* GFX6 but reuses gfx8 struct */ + REG_UPDATE(GRPH_CONTROL, + GRPH_ARRAY_MODE, DC_SW_LINEAR); + } +} + static void dce_mi_program_surface_config( struct mem_input *mi, enum surface_pixel_format format, @@ -884,7 +904,8 @@ static const struct mem_input_funcs dce_mi_funcs = { .mem_input_program_pte_vm = dce_mi_program_pte_vm, .mem_input_program_surface_config = dce_mi_program_surface_config, - .mem_input_is_flip_pending = dce_mi_is_flip_pending + .mem_input_is_flip_pending = dce_mi_is_flip_pending, + .mem_input_clear_tiling = dce_mi_clear_tiling, }; #if defined(CONFIG_DRM_AMD_DC_SI) @@ -897,7 +918,8 @@ static const struct mem_input_funcs dce60_mi_funcs = { .mem_input_program_pte_vm = dce_mi_program_pte_vm, .mem_input_program_surface_config = dce60_mi_program_surface_config, - .mem_input_is_flip_pending = dce_mi_is_flip_pending + .mem_input_is_flip_pending = dce_mi_is_flip_pending, + .mem_input_clear_tiling = dce_mi_clear_tiling, }; #endif @@ -910,7 +932,8 @@ static const struct mem_input_funcs dce112_mi_funcs = { .mem_input_program_pte_vm = dce_mi_program_pte_vm, .mem_input_program_surface_config = dce_mi_program_surface_config, - .mem_input_is_flip_pending = dce_mi_is_flip_pending + .mem_input_is_flip_pending = dce_mi_is_flip_pending, + .mem_input_clear_tiling = dce_mi_clear_tiling, }; static const struct mem_input_funcs dce120_mi_funcs = { @@ -922,7 +945,8 @@ static const struct mem_input_funcs dce120_mi_funcs = { .mem_input_program_pte_vm = dce_mi_program_pte_vm, .mem_input_program_surface_config = dce_mi_program_surface_config, - .mem_input_is_flip_pending = dce_mi_is_flip_pending + .mem_input_is_flip_pending = dce_mi_is_flip_pending, + .mem_input_clear_tiling = dce_mi_clear_tiling, }; void dce_mem_input_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c index cae18f8c1c9a..88c75c243bf8 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c @@ -390,8 +390,7 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub, !memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_1, sizeof(DP_SINK_DEVICE_STR_ID_1))) link->psr_settings.force_ffu_mode = 1; - else - link->psr_settings.force_ffu_mode = 0; + copy_settings_data->force_ffu_mode = link->psr_settings.force_ffu_mode; if (((link->dpcd_caps.fec_cap.bits.FEC_CAPABLE && diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c index 573898984726..f9961a6446f3 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c @@ -168,31 +168,33 @@ void dcn31_panel_cntl_construct( struct dcn31_panel_cntl *dcn31_panel_cntl, const struct panel_cntl_init_data *init_data) { - uint8_t pwrseq_inst = 0xF; dcn31_panel_cntl->base.funcs = &dcn31_link_panel_cntl_funcs; dcn31_panel_cntl->base.ctx = init_data->ctx; dcn31_panel_cntl->base.inst = init_data->inst; - switch (init_data->eng_id) { - case ENGINE_ID_DIGA: - pwrseq_inst = 0; - break; - case ENGINE_ID_DIGB: - pwrseq_inst = 1; - break; - default: - DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", init_data->eng_id); - ASSERT(false); - break; - } - - if (dcn31_panel_cntl->base.ctx->dc->config.support_edp0_on_dp1) + if (dcn31_panel_cntl->base.ctx->dc->config.support_edp0_on_dp1) { //If supported, power sequencer mapping shall follow the DIG instance + uint8_t pwrseq_inst = 0xF; + + switch (init_data->eng_id) { + case ENGINE_ID_DIGA: + pwrseq_inst = 0; + break; + case ENGINE_ID_DIGB: + pwrseq_inst = 1; + break; + default: + DC_LOG_WARNING("Unsupported pwrseq engine id: %d!\n", init_data->eng_id); + ASSERT(false); + break; + } + dcn31_panel_cntl->base.pwrseq_inst = pwrseq_inst; - else + } else { /* If not supported, pwrseq will be assigned in order, * so first pwrseq will be assigned to first panel instance (legacy behavior) */ dcn31_panel_cntl->base.pwrseq_inst = dcn31_panel_cntl->base.inst; + } } diff --git a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c index 39525721c976..f1235bf9a596 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml/calcs/dcn_calcs.c @@ -1312,138 +1312,6 @@ bool dcn_validate_bandwidth( return false; } -static unsigned int dcn_find_normalized_clock_vdd_Level( - const struct dc *dc, - enum dm_pp_clock_type clocks_type, - int clocks_in_khz) -{ - int vdd_level = dcn_bw_v_min0p65; - - if (clocks_in_khz == 0)/*todo some clock not in the considerations*/ - return vdd_level; - - switch (clocks_type) { - case DM_PP_CLOCK_TYPE_DISPLAY_CLK: - if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmax0p9*1000) { - vdd_level = dcn_bw_v_max0p91; - BREAK_TO_DEBUGGER(); - } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vnom0p8*1000) { - vdd_level = dcn_bw_v_max0p9; - } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmid0p72*1000) { - vdd_level = dcn_bw_v_nom0p8; - } else if (clocks_in_khz > dc->dcn_soc->max_dispclk_vmin0p65*1000) { - vdd_level = dcn_bw_v_mid0p72; - } else - vdd_level = dcn_bw_v_min0p65; - break; - case DM_PP_CLOCK_TYPE_DISPLAYPHYCLK: - if (clocks_in_khz > dc->dcn_soc->phyclkv_max0p9*1000) { - vdd_level = dcn_bw_v_max0p91; - BREAK_TO_DEBUGGER(); - } else if (clocks_in_khz > dc->dcn_soc->phyclkv_nom0p8*1000) { - vdd_level = dcn_bw_v_max0p9; - } else if (clocks_in_khz > dc->dcn_soc->phyclkv_mid0p72*1000) { - vdd_level = dcn_bw_v_nom0p8; - } else if (clocks_in_khz > dc->dcn_soc->phyclkv_min0p65*1000) { - vdd_level = dcn_bw_v_mid0p72; - } else - vdd_level = dcn_bw_v_min0p65; - break; - - case DM_PP_CLOCK_TYPE_DPPCLK: - if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmax0p9*1000) { - vdd_level = dcn_bw_v_max0p91; - BREAK_TO_DEBUGGER(); - } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vnom0p8*1000) { - vdd_level = dcn_bw_v_max0p9; - } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmid0p72*1000) { - vdd_level = dcn_bw_v_nom0p8; - } else if (clocks_in_khz > dc->dcn_soc->max_dppclk_vmin0p65*1000) { - vdd_level = dcn_bw_v_mid0p72; - } else - vdd_level = dcn_bw_v_min0p65; - break; - - case DM_PP_CLOCK_TYPE_MEMORY_CLK: - { - unsigned factor = (ddr4_dram_factor_single_Channel * dc->dcn_soc->number_of_channels); - - if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9*1000000/factor) { - vdd_level = dcn_bw_v_max0p91; - BREAK_TO_DEBUGGER(); - } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8*1000000/factor) { - vdd_level = dcn_bw_v_max0p9; - } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72*1000000/factor) { - vdd_level = dcn_bw_v_nom0p8; - } else if (clocks_in_khz > dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65*1000000/factor) { - vdd_level = dcn_bw_v_mid0p72; - } else - vdd_level = dcn_bw_v_min0p65; - } - break; - - case DM_PP_CLOCK_TYPE_DCFCLK: - if (clocks_in_khz > dc->dcn_soc->dcfclkv_max0p9*1000) { - vdd_level = dcn_bw_v_max0p91; - BREAK_TO_DEBUGGER(); - } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_nom0p8*1000) { - vdd_level = dcn_bw_v_max0p9; - } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_mid0p72*1000) { - vdd_level = dcn_bw_v_nom0p8; - } else if (clocks_in_khz > dc->dcn_soc->dcfclkv_min0p65*1000) { - vdd_level = dcn_bw_v_mid0p72; - } else - vdd_level = dcn_bw_v_min0p65; - break; - - default: - break; - } - return vdd_level; -} - -unsigned int dcn_find_dcfclk_suits_all( - const struct dc *dc, - struct dc_clocks *clocks) -{ - unsigned vdd_level, vdd_level_temp; - unsigned dcf_clk; - - /*find a common supported voltage level*/ - vdd_level = dcn_find_normalized_clock_vdd_Level( - dc, DM_PP_CLOCK_TYPE_DISPLAY_CLK, clocks->dispclk_khz); - vdd_level_temp = dcn_find_normalized_clock_vdd_Level( - dc, DM_PP_CLOCK_TYPE_DISPLAYPHYCLK, clocks->phyclk_khz); - - vdd_level = dcn_bw_max(vdd_level, vdd_level_temp); - vdd_level_temp = dcn_find_normalized_clock_vdd_Level( - dc, DM_PP_CLOCK_TYPE_DPPCLK, clocks->dppclk_khz); - vdd_level = dcn_bw_max(vdd_level, vdd_level_temp); - - vdd_level_temp = dcn_find_normalized_clock_vdd_Level( - dc, DM_PP_CLOCK_TYPE_MEMORY_CLK, clocks->fclk_khz); - vdd_level = dcn_bw_max(vdd_level, vdd_level_temp); - vdd_level_temp = dcn_find_normalized_clock_vdd_Level( - dc, DM_PP_CLOCK_TYPE_DCFCLK, clocks->dcfclk_khz); - - /*find that level conresponding dcfclk*/ - vdd_level = dcn_bw_max(vdd_level, vdd_level_temp); - if (vdd_level == dcn_bw_v_max0p91) { - BREAK_TO_DEBUGGER(); - dcf_clk = dc->dcn_soc->dcfclkv_max0p9*1000; - } else if (vdd_level == dcn_bw_v_max0p9) - dcf_clk = dc->dcn_soc->dcfclkv_max0p9*1000; - else if (vdd_level == dcn_bw_v_nom0p8) - dcf_clk = dc->dcn_soc->dcfclkv_nom0p8*1000; - else if (vdd_level == dcn_bw_v_mid0p72) - dcf_clk = dc->dcn_soc->dcfclkv_mid0p72*1000; - else - dcf_clk = dc->dcn_soc->dcfclkv_min0p65*1000; - - DC_LOG_BANDWIDTH_CALCS("\tdcf_clk for voltage = %d\n", dcf_clk); - return dcf_clk; -} - void dcn_bw_update_from_pplib_fclks( struct dc *dc, struct dm_pp_clock_levels_with_voltage *fclks) diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c index 76d3bb3c9155..8d4873f80df0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c @@ -1562,6 +1562,7 @@ static void dml_rq_dlg_get_dlg_params(struct display_mode_lib *mode_lib, dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_vblank = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_vblank); dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); + disp_dlg_regs->refcyc_per_pte_group_vblank_l = (unsigned int)(dst_y_per_row_vblank * (double)htotal * ref_freq_to_pix_freq / (double)dpte_groups_per_row_ub_l); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/Makefile b/drivers/gpu/drm/amd/display/dc/dml2/Makefile index c4378e620cbf..d9c27ebe12ee 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml2/Makefile @@ -73,9 +73,8 @@ AMD_DAL_DML2 = $(addprefix $(AMDDALPATH)/dc/dml2/,$(DML2)) AMD_DISPLAY_FILES += $(AMD_DAL_DML2) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top_mcache.o := $(dml2_ccflags) -CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_ccflags) +CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_ccflags) $(frame_warn_flag) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_ccflags) @@ -94,9 +93,8 @@ CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_ccflags) CFLAGS_$(AMDDALPATH)/dc/dml2/dml21/inc/dml2_debug.o := $(dml2_ccflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml_top_mcache.o := $(dml2_rcflags) -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.o := $(dml2_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/src/dml2_core/dml2_core_factory.o := $(dml2_rcflags) @@ -113,9 +111,8 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_translation_helper.o := $(dml2_r CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/dml21_utils.o := $(dml2_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml2/dml21/inc/dml2_debug.o := $(dml2_rcflags) -DML21 := src/dml2_top/dml_top.o -DML21 += src/dml2_top/dml_top_mcache.o -DML21 += src/dml2_top/dml2_top_optimization.o +DML21 := src/dml2_top/dml2_top_interfaces.o +DML21 += src/dml2_top/dml2_top_soc15.o DML21 += src/inc/dml2_debug.o DML21 += src/dml2_core/dml2_core_dcn4.o DML21 += src/dml2_core/dml2_core_factory.o diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c index 8dabb1ac0b68..35bc917631ae 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c @@ -6301,9 +6301,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.meta_row_bandwidth_this_state, mode_lib->ms.dpte_row_bandwidth_this_state, mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor); + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j]); s->VMDataOnlyReturnBWPerState = dml_get_return_bw_mbps_vm_only( &mode_lib->ms.soc, @@ -6434,7 +6434,7 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) /* Output */ &mode_lib->ms.UrgentBurstFactorCursorPre[k], &mode_lib->ms.UrgentBurstFactorLumaPre[k], - &mode_lib->ms.UrgentBurstFactorChroma[k], + &mode_lib->ms.UrgentBurstFactorChromaPre[k], &mode_lib->ms.NotUrgentLatencyHidingPre[k]); mode_lib->ms.cursor_bw_pre[k] = mode_lib->ms.cache_display_cfg.plane.NumberOfCursors[k] * mode_lib->ms.cache_display_cfg.plane.CursorWidth[k] * @@ -6458,9 +6458,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cursor_bw_pre, mode_lib->ms.prefetch_vmrow_bw, mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], mode_lib->ms.UrgentBurstFactorLumaPre, mode_lib->ms.UrgentBurstFactorChromaPre, mode_lib->ms.UrgentBurstFactorCursorPre, @@ -6517,9 +6517,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cursor_bw, mode_lib->ms.cursor_bw_pre, mode_lib->ms.NoOfDPPThisState, - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], mode_lib->ms.UrgentBurstFactorLumaPre, mode_lib->ms.UrgentBurstFactorChromaPre, mode_lib->ms.UrgentBurstFactorCursorPre); @@ -6586,9 +6586,9 @@ static void dml_prefetch_check(struct display_mode_lib_st *mode_lib) mode_lib->ms.cursor_bw_pre, mode_lib->ms.prefetch_vmrow_bw, mode_lib->ms.NoOfDPP[j], // VBA_ERROR DPPPerSurface is not assigned at this point, should use NoOfDpp here - mode_lib->ms.UrgentBurstFactorLuma, - mode_lib->ms.UrgentBurstFactorChroma, - mode_lib->ms.UrgentBurstFactorCursor, + mode_lib->ms.UrgentBurstFactorLuma[j], + mode_lib->ms.UrgentBurstFactorChroma[j], + mode_lib->ms.UrgentBurstFactorCursor[j], mode_lib->ms.UrgentBurstFactorLumaPre, mode_lib->ms.UrgentBurstFactorChromaPre, mode_lib->ms.UrgentBurstFactorCursorPre, @@ -7809,9 +7809,9 @@ dml_bool_t dml_core_mode_support(struct display_mode_lib_st *mode_lib) mode_lib->ms.DETBufferSizeYThisState[k], mode_lib->ms.DETBufferSizeCThisState[k], /* Output */ - &mode_lib->ms.UrgentBurstFactorCursor[k], - &mode_lib->ms.UrgentBurstFactorLuma[k], - &mode_lib->ms.UrgentBurstFactorChroma[k], + &mode_lib->ms.UrgentBurstFactorCursor[j][k], + &mode_lib->ms.UrgentBurstFactorLuma[j][k], + &mode_lib->ms.UrgentBurstFactorChroma[j][k], &mode_lib->ms.NotUrgentLatencyHiding[k]); } @@ -8318,7 +8318,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc if (clk_cfg->dcfclk_option != dml_use_override_freq) locals->Dcfclk = mode_lib->ms.DCFCLK; else - locals->Dcfclk = clk_cfg->dcfclk_freq_mhz; + locals->Dcfclk = clk_cfg->dcfclk_mhz; #ifdef __DML_VBA_DEBUG__ dml_print_dml_policy(&mode_lib->ms.policy); @@ -8371,7 +8371,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc if (clk_cfg->dispclk_option == dml_use_required_freq) locals->Dispclk = locals->Dispclk_calculated; else if (clk_cfg->dispclk_option == dml_use_override_freq) - locals->Dispclk = clk_cfg->dispclk_freq_mhz; + locals->Dispclk = clk_cfg->dispclk_mhz; else locals->Dispclk = mode_lib->ms.state.dispclk_mhz; #ifdef __DML_VBA_DEBUG__ @@ -8412,7 +8412,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc if (clk_cfg->dppclk_option[k] == dml_use_required_freq) locals->Dppclk[k] = locals->Dppclk_calculated[k]; else if (clk_cfg->dppclk_option[k] == dml_use_override_freq) - locals->Dppclk[k] = clk_cfg->dppclk_freq_mhz[k]; + locals->Dppclk[k] = clk_cfg->dppclk_mhz[k]; else locals->Dppclk[k] = mode_lib->ms.state.dppclk_mhz; #ifdef __DML_VBA_DEBUG__ @@ -9190,6 +9190,8 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc &locals->FractionOfUrgentBandwidth, &s->dummy_boolean[0]); // dml_bool_t *PrefetchBandwidthSupport + + if (s->VRatioPrefetchMoreThanMax != false || s->DestinationLineTimesForPrefetchLessThan2 != false) { dml_print("DML::%s: VRatioPrefetchMoreThanMax = %u\n", __func__, s->VRatioPrefetchMoreThanMax); dml_print("DML::%s: DestinationLineTimesForPrefetchLessThan2 = %u\n", __func__, s->DestinationLineTimesForPrefetchLessThan2); @@ -9204,6 +9206,7 @@ void dml_core_mode_programming(struct display_mode_lib_st *mode_lib, const struc } } + if (locals->PrefetchModeSupported == true && mode_lib->ms.support.ImmediateFlipSupport == true) { locals->BandwidthAvailableForImmediateFlip = CalculateBandwidthAvailableForImmediateFlip( mode_lib->ms.num_active_planes, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h index f951936bb579..dd3f43181a6e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core_structs.h @@ -28,6 +28,7 @@ #define __DISPLAY_MODE_CORE_STRUCT_H__ #include "display_mode_lib_defines.h" +#include "dml_top_display_cfg_types.h" enum dml_project_id { dml_project_invalid = 0, @@ -49,7 +50,9 @@ enum dml_use_mall_for_pstate_change_mode { dml_use_mall_pstate_change_disable = 0, dml_use_mall_pstate_change_full_frame = 1, dml_use_mall_pstate_change_sub_viewport = 2, - dml_use_mall_pstate_change_phantom_pipe = 3 + dml_use_mall_pstate_change_phantom_pipe = 3, + dml_use_mall_pstate_change_phantom_pipe_no_data_return = 4, + dml_use_mall_pstate_change_imall = 5 }; enum dml_use_mall_for_static_screen_mode { dml_use_mall_static_screen_disable = 0, @@ -171,7 +174,11 @@ enum dml_swizzle_mode { dml_sw_256kb_z_x = 28, dml_sw_256kb_s_x = 29, dml_sw_256kb_d_x = 30, - dml_sw_256kb_r_x = 31 + dml_sw_256kb_r_x = 31, + dml_sw_256b_2d = 32, + dml_sw_4kb_2d = 33, + dml_sw_64kb_2d = 34, + dml_sw_256kb_2d = 35 }; enum dml_lb_depth { dml_lb_6 = 0, @@ -223,24 +230,28 @@ enum dml_mpc_use_policy { dml_mpc_disabled = 0, dml_mpc_as_possible = 1, dml_mpc_as_needed_for_voltage = 2, - dml_mpc_as_needed_for_pstate_and_voltage = 3 + dml_mpc_as_needed_for_pstate_and_voltage = 3, + dml_mpc_as_needed = 4, + dml_mpc_2to1 = 5 }; enum dml_odm_use_policy { dml_odm_use_policy_bypass = 0, dml_odm_use_policy_combine_as_needed = 1, dml_odm_use_policy_combine_2to1 = 2, - dml_odm_use_policy_combine_4to1 = 3, - dml_odm_use_policy_split_1to2 = 4, - dml_odm_use_policy_mso_1to2 = 5, - dml_odm_use_policy_mso_1to4 = 6 + dml_odm_use_policy_combine_3to1 = 3, + dml_odm_use_policy_combine_4to1 = 4, + dml_odm_use_policy_split_1to2 = 5, + dml_odm_use_policy_mso_1to2 = 6, + dml_odm_use_policy_mso_1to4 = 7 }; enum dml_odm_mode { dml_odm_mode_bypass = 0, dml_odm_mode_combine_2to1 = 1, - dml_odm_mode_combine_4to1 = 2, - dml_odm_mode_split_1to2 = 3, - dml_odm_mode_mso_1to2 = 4, - dml_odm_mode_mso_1to4 = 5 + dml_odm_mode_combine_3to1 = 2, + dml_odm_mode_combine_4to1 = 3, + dml_odm_mode_split_1to2 = 4, + dml_odm_mode_mso_1to2 = 5, + dml_odm_mode_mso_1to4 = 6 }; enum dml_writeback_configuration { dml_whole_buffer_for_single_stream_no_interleave = 0, @@ -289,6 +300,17 @@ struct soc_state_bounding_box_st { dml_float_t fclk_change_latency_us; dml_float_t usr_retraining_latency_us; dml_bool_t use_ideal_dram_bw_strobe; + dml_float_t g6_temp_read_blackout_us; + + struct { + dml_uint_t urgent_ramp_uclk_cycles; + dml_uint_t trip_to_memory_uclk_cycles; + dml_uint_t meta_trip_to_memory_uclk_cycles; + dml_uint_t maximum_latency_when_urgent_uclk_cycles; + dml_uint_t average_latency_when_urgent_uclk_cycles; + dml_uint_t maximum_latency_when_non_urgent_uclk_cycles; + dml_uint_t average_latency_when_non_urgent_uclk_cycles; + } dml_dcn401_uclk_dpm_dependent_soc_qos_params; }; struct soc_bounding_box_st { @@ -297,7 +319,7 @@ struct soc_bounding_box_st { dml_float_t pcierefclk_mhz; dml_float_t refclk_mhz; dml_float_t amclk_mhz; - dml_float_t max_outstanding_reqs; + dml_uint_t max_outstanding_reqs; dml_float_t pct_ideal_sdp_bw_after_urgent; dml_float_t pct_ideal_fabric_bw_after_urgent; dml_float_t pct_ideal_dram_bw_after_urgent_pixel_only; @@ -308,6 +330,16 @@ struct soc_bounding_box_st { dml_float_t max_avg_fabric_bw_use_normal_percent; dml_float_t max_avg_dram_bw_use_normal_percent; dml_float_t max_avg_dram_bw_use_normal_strobe_percent; + + dml_float_t svp_prefetch_pct_ideal_sdp_bw_after_urgent; + dml_float_t svp_prefetch_pct_ideal_fabric_bw_after_urgent; + dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_only; + dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_pixel_and_vm; + dml_float_t svp_prefetch_pct_ideal_dram_bw_after_urgent_vm_only; + dml_float_t svp_prefetch_max_avg_sdp_bw_use_normal_percent; + dml_float_t svp_prefetch_max_avg_fabric_bw_use_normal_percent; + dml_float_t svp_prefetch_max_avg_dram_bw_use_normal_percent; + dml_uint_t round_trip_ping_latency_dcfclk_cycles; dml_uint_t urgent_out_of_order_return_per_channel_pixel_only_bytes; dml_uint_t urgent_out_of_order_return_per_channel_pixel_and_vm_bytes; @@ -324,6 +356,26 @@ struct soc_bounding_box_st { dml_uint_t mall_allocated_for_dcn_mbytes; dml_float_t dispclk_dppclk_vco_speed_mhz; dml_bool_t do_urgent_latency_adjustment; + + dml_uint_t mem_word_bytes; + dml_uint_t num_dcc_mcaches; + dml_uint_t mcache_size_bytes; + dml_uint_t mcache_line_size_bytes; + + struct { + dml_bool_t UseNewDCN401SOCParameters; + dml_uint_t df_qos_response_time_fclk_cycles; + dml_uint_t max_round_trip_to_furthest_cs_fclk_cycles; + dml_uint_t mall_overhead_fclk_cycles; + dml_uint_t meta_trip_adder_fclk_cycles; + dml_uint_t average_transport_distance_fclk_cycles; + dml_float_t umc_urgent_ramp_latency_margin; + dml_float_t umc_max_latency_margin; + dml_float_t umc_average_latency_margin; + dml_float_t fabric_max_transport_latency_margin; + dml_float_t fabric_average_transport_latency_margin; + } dml_dcn401_soc_qos_params; + }; struct ip_params_st { @@ -515,6 +567,10 @@ struct dml_plane_cfg_st { dml_uint_t CursorWidth[__DML_NUM_PLANES__]; dml_uint_t CursorBPP[__DML_NUM_PLANES__]; + dml_bool_t setup_for_tdlut[__DML_NUM_PLANES__]; + enum dml2_tdlut_addressing_mode tdlut_addressing_mode[__DML_NUM_PLANES__]; + enum dml2_tdlut_width_mode tdlut_width_mode[__DML_NUM_PLANES__]; + enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[__DML_NUM_PLANES__]; enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[__DML_NUM_PLANES__]; @@ -604,6 +660,17 @@ struct dml_hw_resource_st { dml_float_t DLGRefClkFreqMHz; /// <brief DLG Global Reference timer }; +/// @brief To control the clk usage for model programming +struct dml_clk_cfg_st { + enum dml_clk_cfg_policy dcfclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq + enum dml_clk_cfg_policy dispclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq + enum dml_clk_cfg_policy dppclk_option[__DML_NUM_PLANES__]; + + dml_float_t dcfclk_mhz; + dml_float_t dispclk_mhz; + dml_float_t dppclk_mhz[__DML_NUM_PLANES__]; +}; // dml_clk_cfg_st + /// @brief DML display configuration. /// Describe how to display a surface in multi-plane setup and output to different output and writeback using the specified timgin struct dml_display_cfg_st { @@ -616,19 +683,9 @@ struct dml_display_cfg_st { unsigned int num_timings; struct dml_hw_resource_st hw; //< brief for mode programming + struct dml_clk_cfg_st clk_overrides; //< brief for mode programming clk override }; // dml_display_cfg_st -/// @brief To control the clk usage for model programming -struct dml_clk_cfg_st { - enum dml_clk_cfg_policy dcfclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq - enum dml_clk_cfg_policy dispclk_option; ///< brief Use for mode_program; user can select between use the min require clk req as calculated by DML or use the test-specific freq - enum dml_clk_cfg_policy dppclk_option[__DML_NUM_PLANES__]; - - dml_float_t dcfclk_freq_mhz; - dml_float_t dispclk_freq_mhz; - dml_float_t dppclk_freq_mhz[__DML_NUM_PLANES__]; -}; // dml_clk_cfg_st - /// @brief DML mode evaluation and programming policy /// Those knobs that affect mode support and mode programming struct dml_mode_eval_policy_st { @@ -884,11 +941,11 @@ struct mode_support_st { dml_uint_t meta_row_height[__DML_NUM_PLANES__]; dml_uint_t meta_row_height_chroma[__DML_NUM_PLANES__]; dml_float_t UrgLatency; - dml_float_t UrgentBurstFactorCursor[__DML_NUM_PLANES__]; + dml_float_t UrgentBurstFactorCursor[2][__DML_NUM_PLANES__]; dml_float_t UrgentBurstFactorCursorPre[__DML_NUM_PLANES__]; - dml_float_t UrgentBurstFactorLuma[__DML_NUM_PLANES__]; + dml_float_t UrgentBurstFactorLuma[2][__DML_NUM_PLANES__]; dml_float_t UrgentBurstFactorLumaPre[__DML_NUM_PLANES__]; - dml_float_t UrgentBurstFactorChroma[__DML_NUM_PLANES__]; + dml_float_t UrgentBurstFactorChroma[2][__DML_NUM_PLANES__]; dml_float_t UrgentBurstFactorChromaPre[__DML_NUM_PLANES__]; dml_float_t MaximumSwathWidthInLineBufferLuma; dml_float_t MaximumSwathWidthInLineBufferChroma; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c index c247aee89caf..89890c88fd66 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_util.c @@ -690,12 +690,12 @@ __DML_DLL_EXPORT__ void dml_print_clk_cfg(const struct dml_clk_cfg_st *clk_cfg) dml_print("DML: clk_cfg: dcfclk_option = %d\n", clk_cfg->dcfclk_option); dml_print("DML: clk_cfg: dispclk_option = %d\n", clk_cfg->dispclk_option); - dml_print("DML: clk_cfg: dcfclk_freq_mhz = %f\n", clk_cfg->dcfclk_freq_mhz); - dml_print("DML: clk_cfg: dispclk_freq_mhz = %f\n", clk_cfg->dispclk_freq_mhz); + dml_print("DML: clk_cfg: dcfclk_mhz = %f\n", clk_cfg->dcfclk_mhz); + dml_print("DML: clk_cfg: dispclk_mhz = %f\n", clk_cfg->dispclk_mhz); for (dml_uint_t i = 0; i < DCN_DML__NUM_PLANE; i++) { dml_print("DML: clk_cfg: i=%d, dppclk_option = %d\n", i, clk_cfg->dppclk_option[i]); - dml_print("DML: clk_cfg: i=%d, dppclk_freq_mhz = %f\n", i, clk_cfg->dppclk_freq_mhz[i]); + dml_print("DML: clk_cfg: i=%d, dppclk_mhz = %f\n", i, clk_cfg->dppclk_mhz[i]); } } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c index c6a5a8614679..efb099905496 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c @@ -1077,6 +1077,8 @@ void dml21_copy_clocks_to_dc_state(struct dml2_context *in_ctx, struct dc_state context->bw_ctx.bw.dcn.clk.dtbclk_en = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz > 0; context->bw_ctx.bw.dcn.clk.ref_dtbclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.dtbrefclk_khz; context->bw_ctx.bw.dcn.clk.socclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.socclk_khz; + context->bw_ctx.bw.dcn.clk.subvp_prefetch_dramclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz; + context->bw_ctx.bw.dcn.clk.subvp_prefetch_fclk_khz = in_ctx->v21.mode_programming.programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz; } void dml21_extract_legacy_watermark_set(const struct dc *in_dc, struct dcn_watermarks *watermark, enum dml2_dchub_watermark_reg_set_index reg_set_idx, struct dml2_context *in_ctx) @@ -1226,22 +1228,22 @@ void dml21_set_dc_p_state_type( bool sub_vp_enabled) { switch (stream_programming->uclk_pstate_method) { - case dml2_uclk_pstate_support_method_vactive: - case dml2_uclk_pstate_support_method_fw_vactive_drr: + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: pipe_ctx->p_state_type = P_STATE_V_ACTIVE; break; - case dml2_uclk_pstate_support_method_vblank: - case dml2_uclk_pstate_support_method_fw_vblank_drr: + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: if (sub_vp_enabled) pipe_ctx->p_state_type = P_STATE_V_BLANK_SUB_VP; else pipe_ctx->p_state_type = P_STATE_V_BLANK; break; - case dml2_uclk_pstate_support_method_fw_subvp_phantom: - case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr: + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: pipe_ctx->p_state_type = P_STATE_SUB_VP; break; - case dml2_uclk_pstate_support_method_fw_drr: + case dml2_pstate_method_fw_drr: if (sub_vp_enabled) pipe_ctx->p_state_type = P_STATE_DRR_SUB_VP; else diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c index 51d491bffa32..cb966f8d3216 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_utils.c @@ -482,7 +482,8 @@ void dml21_build_fams2_programming(const struct dc *dc, unsigned int num_fams2_streams = 0; /* reset fams2 data */ - memset(&context->bw_ctx.bw.dcn.fams2_stream_params, 0, sizeof(struct dmub_fams2_stream_static_state) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_stream_base_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES); + memset(&context->bw_ctx.bw.dcn.fams2_stream_sub_params, 0, sizeof(union dmub_cmd_fams2_config) * DML2_MAX_PLANES); memset(&context->bw_ctx.bw.dcn.fams2_global_config, 0, sizeof(struct dmub_cmd_fams2_global_config)); if (dml_ctx->v21.mode_programming.programming->fams2_required) { @@ -490,8 +491,10 @@ void dml21_build_fams2_programming(const struct dc *dc, int dml_stream_idx; struct dc_stream_state *phantom_stream; struct dc_stream_status *phantom_status; + enum fams2_stream_type type = 0; - struct dmub_fams2_stream_static_state *static_state = &context->bw_ctx.bw.dcn.fams2_stream_params[num_fams2_streams]; + union dmub_cmd_fams2_config *static_base_state = &context->bw_ctx.bw.dcn.fams2_stream_base_params[num_fams2_streams]; + union dmub_cmd_fams2_config *static_sub_state = &context->bw_ctx.bw.dcn.fams2_stream_sub_params[num_fams2_streams]; struct dc_stream_state *stream = context->streams[i]; @@ -508,28 +511,38 @@ void dml21_build_fams2_programming(const struct dc *dc, } /* copy static state from PMO */ - memcpy(static_state, - &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_params, - sizeof(struct dmub_fams2_stream_static_state)); - - /* get information from context */ - static_state->num_planes = context->stream_status[i].plane_count; - static_state->otg_inst = context->stream_status[i].primary_otg_inst; - - /* populate pipe masks for planes */ - for (j = 0; j < context->stream_status[i].plane_count; j++) { - for (k = 0; k < dc->res_pool->pipe_count; k++) { - if (context->res_ctx.pipe_ctx[k].stream && - context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && - context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { - static_state->pipe_mask |= (1 << k); - static_state->plane_pipe_masks[j] |= (1 << k); + memcpy(static_base_state, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_base_params, + sizeof(union dmub_cmd_fams2_config)); + memcpy(static_sub_state, + &dml_ctx->v21.mode_programming.programming->stream_programming[dml_stream_idx].fams2_sub_params, + sizeof(union dmub_cmd_fams2_config)); + + switch (dc->debug.fams_version.minor) { + case 1: + default: + type = static_base_state->stream_v1.base.type; + + /* get information from context */ + static_base_state->stream_v1.base.num_planes = context->stream_status[i].plane_count; + static_base_state->stream_v1.base.otg_inst = context->stream_status[i].primary_otg_inst; + + /* populate pipe masks for planes */ + for (j = 0; j < context->stream_status[i].plane_count; j++) { + for (k = 0; k < dc->res_pool->pipe_count; k++) { + if (context->res_ctx.pipe_ctx[k].stream && + context->res_ctx.pipe_ctx[k].stream->stream_id == stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == context->stream_status[i].plane_states[j]) { + static_base_state->stream_v1.base.pipe_mask |= (1 << k); + static_base_state->stream_v1.base.plane_pipe_masks[j] |= (1 << k); + } } } } + /* get per method programming */ - switch (static_state->type) { + switch (type) { case FAMS2_STREAM_TYPE_VBLANK: case FAMS2_STREAM_TYPE_VACTIVE: case FAMS2_STREAM_TYPE_DRR: @@ -543,16 +556,27 @@ void dml21_build_fams2_programming(const struct dc *dc, /* phantom status should always be present */ ASSERT(phantom_status); - static_state->sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + if (!phantom_status) + break; - /* populate pipe masks for phantom planes */ - for (j = 0; j < phantom_status->plane_count; j++) { - for (k = 0; k < dc->res_pool->pipe_count; k++) { - if (context->res_ctx.pipe_ctx[k].stream && - context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && - context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { - static_state->sub_state.subvp.phantom_pipe_mask |= (1 << k); - static_state->sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + switch (dc->debug.fams_version.minor) { + case 1: + default: + static_sub_state->stream_v1.sub_state.subvp.phantom_otg_inst = phantom_status->primary_otg_inst; + + /* populate pipe masks for phantom planes */ + for (j = 0; j < phantom_status->plane_count; j++) { + for (k = 0; k < dc->res_pool->pipe_count; k++) { + if (context->res_ctx.pipe_ctx[k].stream && + context->res_ctx.pipe_ctx[k].stream->stream_id == phantom_stream->stream_id && + context->res_ctx.pipe_ctx[k].plane_state == phantom_status->plane_states[j]) { + switch (dc->debug.fams_version.minor) { + case 1: + default: + static_sub_state->stream_v1.sub_state.subvp.phantom_pipe_mask |= (1 << k); + static_sub_state->stream_v1.sub_state.subvp.phantom_plane_pipe_masks[j] |= (1 << k); + } + } } } } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h index 8ef7977841de..793e1c038efd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/bounding_boxes/dcn4_soc_bb.h @@ -344,6 +344,7 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .config_return_buffer_segment_size_in_kbytes = 64, .meta_fifo_size_in_kentries = 22, .compressed_buffer_segment_size_in_kbytes = 64, + .cursor_buffer_size = 24, .max_flip_time_us = 80, .max_flip_time_lines = 32, .hostvm_mode = 0, @@ -354,7 +355,7 @@ static const struct dml2_ip_capabilities dml2_dcn401_max_ip_caps = { .fams2 = { .max_allow_delay_us = 100 * 1000, - .scheduling_delay_us = 125, + .scheduling_delay_us = 550, .vertical_interrupt_ack_delay_us = 40, .allow_programming_delay_us = 18, .min_allow_width_us = 20, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h index b132f676a68d..5e1ab6d97640 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_display_cfg_types.h @@ -10,9 +10,10 @@ #define DML2_MAX_PLANES 8 #define DML2_MAX_DCN_PIPES 8 #define DML2_MAX_MCACHES 8 // assume plane is going to be supported by a max of 8 mcaches +#define DML2_MAX_WRITEBACK 3 enum dml2_swizzle_mode { - dml2_sw_linear, + dml2_sw_linear, // SW_LINEAR accepts 256 byte aligned pitch and also 128 byte aligned pitch if DCC is not enabled dml2_sw_256b_2d, dml2_sw_4kb_2d, dml2_sw_64kb_2d, @@ -24,7 +25,8 @@ enum dml2_swizzle_mode { dml2_gfx11_sw_64kb_d_x, dml2_gfx11_sw_64kb_r_x, dml2_gfx11_sw_256kb_d_x, - dml2_gfx11_sw_256kb_r_x + dml2_gfx11_sw_256kb_r_x, + }; enum dml2_source_format_class { @@ -38,7 +40,13 @@ enum dml2_source_format_class { dml2_rgbe_alpha = 9, dml2_rgbe = 10, dml2_mono_8 = 11, - dml2_mono_16 = 12 + dml2_mono_16 = 12, + dml2_422_planar_8 = 13, + dml2_422_planar_10 = 14, + dml2_422_planar_12 = 15, + dml2_422_packed_8 = 16, + dml2_422_packed_10 = 17, + dml2_422_packed_12 = 18 }; enum dml2_rotation_angle { @@ -121,15 +129,6 @@ enum dml2_dsc_enable_option { dml2_dsc_enable_if_necessary = 2 }; -enum dml2_pstate_support_method { - dml2_pstate_method_uninitialized, - dml2_pstate_method_not_supported, - dml2_pstate_method_vactive, - dml2_pstate_method_vblank, - dml2_pstate_method_svp, - dml2_pstate_method_drr -}; - enum dml2_tdlut_addressing_mode { dml2_tdlut_sw_linear = 0, dml2_tdlut_simple_linear = 1 @@ -287,22 +286,23 @@ struct dml2_link_output_cfg { bool validate_output; // Do not validate the link configuration for this display stream. }; -struct dml2_writeback_cfg { - bool enable; +struct dml2_writeback_info { enum dml2_source_format_class pixel_format; - unsigned int active_writebacks_per_surface; + unsigned long input_width; + unsigned long input_height; + unsigned long output_width; + unsigned long output_height; + unsigned long v_taps; + unsigned long h_taps; + unsigned long v_taps_chroma; + unsigned long h_taps_chroma; + double h_ratio; + double v_ratio; +}; - struct { - bool enabled; - unsigned long input_width; - unsigned long input_height; - unsigned long output_width; - unsigned long output_height; - unsigned long v_taps; - unsigned long h_taps; - double h_ratio; - double v_ratio; - } scaling_info; +struct dml2_writeback_cfg { + unsigned int active_writebacks_per_stream; + struct dml2_writeback_info writeback_stream[DML2_MAX_WRITEBACK]; }; struct dml2_plane_parameters { diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h index ebd8abe894a9..5f0bc42d1d2f 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_soc_parameter_types.h @@ -167,11 +167,13 @@ struct dml2_ip_capabilities { unsigned int max_num_dp2p0_streams; unsigned int max_num_hdmi_frl_outputs; unsigned int max_num_dp2p0_outputs; + unsigned int max_num_wb; unsigned int rob_buffer_size_kbytes; unsigned int config_return_buffer_size_in_kbytes; unsigned int config_return_buffer_segment_size_in_kbytes; unsigned int meta_fifo_size_in_kentries; unsigned int compressed_buffer_segment_size_in_kbytes; + unsigned int cursor_buffer_size; unsigned int max_flip_time_us; unsigned int max_flip_time_lines; unsigned int hostvm_mode; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h index eeb96c455658..d2d053f2354d 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/inc/dml_top_types.h @@ -26,20 +26,14 @@ enum dml2_project_id { dml2_project_dcn4x_stage2_auto_drr_svp = 3, }; -enum dml2_dram_clock_change_support { - dml2_dram_clock_change_vactive = 0, - dml2_dram_clock_change_vblank = 1, - dml2_dram_clock_change_vblank_and_vactive = 2, - dml2_dram_clock_change_drr = 3, - dml2_dram_clock_change_mall_svp = 4, - dml2_dram_clock_change_mall_full_frame = 6, - dml2_dram_clock_change_unsupported = 7 -}; - -enum dml2_fclock_change_support { - dml2_fclock_change_vactive = 0, - dml2_fclock_change_vblank = 1, - dml2_fclock_change_unsupported = 2 +enum dml2_pstate_change_support { + dml2_pstate_change_vactive = 0, + dml2_pstate_change_vblank = 1, + dml2_pstate_change_vblank_and_vactive = 2, + dml2_pstate_change_drr = 3, + dml2_pstate_change_mall_svp = 4, + dml2_pstate_change_mall_full_frame = 6, + dml2_pstate_change_unsupported = 7 }; enum dml2_output_type_and_rate__type { @@ -202,24 +196,23 @@ struct dml2_mcache_surface_allocation { } informative; }; -enum dml2_uclk_pstate_support_method { - dml2_uclk_pstate_support_method_not_supported = 0, - /* hw */ - dml2_uclk_pstate_support_method_vactive = 1, - dml2_uclk_pstate_support_method_vblank = 2, - dml2_uclk_pstate_support_method_reserved_hw = 5, - /* fw */ - dml2_uclk_pstate_support_method_fw_subvp_phantom = 6, - dml2_uclk_pstate_support_method_reserved_fw = 10, - /* fw w/drr */ - dml2_uclk_pstate_support_method_fw_vactive_drr = 11, - dml2_uclk_pstate_support_method_fw_vblank_drr = 12, - dml2_uclk_pstate_support_method_fw_subvp_phantom_drr = 13, - dml2_uclk_pstate_support_method_reserved_fw_drr_fixed = 20, - dml2_uclk_pstate_support_method_fw_drr = 21, - dml2_uclk_pstate_support_method_reserved_fw_drr_var = 22, - - dml2_uclk_pstate_support_method_count +enum dml2_pstate_method { + dml2_pstate_method_na = 0, + /* hw exclusive modes */ + dml2_pstate_method_vactive = 1, + dml2_pstate_method_vblank = 2, + dml2_pstate_method_reserved_hw = 5, + /* fw assisted exclusive modes */ + dml2_pstate_method_fw_svp = 6, + dml2_pstate_method_reserved_fw = 10, + /* fw assisted modes requiring drr modulation */ + dml2_pstate_method_fw_vactive_drr = 11, + dml2_pstate_method_fw_vblank_drr = 12, + dml2_pstate_method_fw_svp_drr = 13, + dml2_pstate_method_reserved_fw_drr_clamped = 20, + dml2_pstate_method_fw_drr = 21, + dml2_pstate_method_reserved_fw_drr_var = 22, + dml2_pstate_method_count }; struct dml2_per_plane_programming { @@ -241,7 +234,7 @@ struct dml2_per_plane_programming { // If a stream is using odm split, then this value is always 1 unsigned int num_dpps_required; - enum dml2_uclk_pstate_support_method uclk_pstate_support_method; + enum dml2_pstate_method uclk_pstate_support_method; // MALL size requirements for MALL SS and SubVP unsigned int surface_size_mall_bytes; @@ -281,7 +274,7 @@ struct dml2_per_stream_programming { unsigned int num_odms_required; - enum dml2_uclk_pstate_support_method uclk_pstate_method; + enum dml2_pstate_method uclk_pstate_method; struct { bool enabled; @@ -289,7 +282,8 @@ struct dml2_per_stream_programming { union dml2_global_sync_programming global_sync; } phantom_stream; - struct dmub_fams2_stream_static_state fams2_params; + union dmub_cmd_fams2_config fams2_base_params; + union dmub_cmd_fams2_config fams2_sub_params; }; //----------------- @@ -339,7 +333,7 @@ struct dml2_mode_support_info { bool DCCMetaBufferSizeNotExceeded; bool TotalVerticalActiveBandwidthSupport; bool VActiveBandwidthSupport; - enum dml2_fclock_change_support FCLKChangeSupport[DML2_MAX_PLANES]; + enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES]; bool USRRetrainingSupport; bool PrefetchSupported; bool DynamicMetadataSupported; @@ -361,6 +355,7 @@ struct dml2_mode_support_info { unsigned int AlignedYPitch[DML2_MAX_PLANES]; unsigned int AlignedCPitch[DML2_MAX_PLANES]; bool g6_temp_read_support; + bool temp_read_or_ppt_support; }; // dml2_mode_support_info struct dml2_display_cfg_programming { @@ -392,6 +387,11 @@ struct dml2_display_cfg_programming { unsigned long fclk_khz; unsigned long dcfclk_khz; } svp_prefetch; + struct { + unsigned long uclk_khz; + unsigned long fclk_khz; + unsigned long dcfclk_khz; + } svp_prefetch_no_throttle; unsigned long deepsleep_dcfclk_khz; unsigned long dispclk_khz; @@ -444,7 +444,7 @@ struct dml2_display_cfg_programming { double pstate_change_us; double fclk_pstate_change_us; double usr_retraining_us; - double g6_temp_read_watermark_us; + double temp_read_or_ppt_watermark_us; } watermarks; struct { @@ -653,6 +653,7 @@ struct dml2_display_cfg_programming { double DisplayPipeLineDeliveryTimeLumaPrefetch[DML2_MAX_PLANES]; double DisplayPipeLineDeliveryTimeChromaPrefetch[DML2_MAX_PLANES]; + double WritebackRequiredBandwidth; double WritebackAllowDRAMClockChangeEndPosition[DML2_MAX_PLANES]; double WritebackAllowFCLKChangeEndPosition[DML2_MAX_PLANES]; double DSCCLK_calculated[DML2_MAX_PLANES]; @@ -662,6 +663,7 @@ struct dml2_display_cfg_programming { double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES]; unsigned int PrefetchMode[DML2_MAX_PLANES]; // LEGACY_ONLY bool ROBUrgencyAvoidance; + double LowestPrefetchMargin; } misc; struct dml2_mode_support_info mode_support_info; @@ -675,6 +677,7 @@ struct dml2_display_cfg_programming { bool failed_mcache_validation; bool failed_dpmm; bool failed_mode_programming; + bool failed_map_watermarks; } informative; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c index 3d41ffde91c1..d68b4567e218 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4.c @@ -9,7 +9,7 @@ #include "dml2_debug.h" #include "lib_float_math.h" -static const struct dml2_core_ip_params core_dcn4_ip_caps_base = { +struct dml2_core_ip_params core_dcn4_ip_caps_base = { // Hardcoded values for DCN3x .vblank_nom_default_us = 668, .remote_iommu_outstanding_translations = 256, @@ -90,6 +90,7 @@ static void patch_ip_caps_with_explicit_ip_params(struct dml2_ip_capabilities *i ip_caps->config_return_buffer_segment_size_in_kbytes = ip_params->config_return_buffer_segment_size_in_kbytes; ip_caps->meta_fifo_size_in_kentries = ip_params->meta_fifo_size_in_kentries; ip_caps->compressed_buffer_segment_size_in_kbytes = ip_params->compressed_buffer_segment_size_in_kbytes; + ip_caps->cursor_buffer_size = ip_params->cursor_buffer_size; ip_caps->max_flip_time_us = ip_params->max_flip_time_us; ip_caps->max_flip_time_lines = ip_params->max_flip_time_lines; ip_caps->hostvm_mode = ip_params->hostvm_mode; @@ -114,6 +115,7 @@ static void patch_ip_params_with_ip_caps(struct dml2_core_ip_params *ip_params, ip_params->config_return_buffer_segment_size_in_kbytes = ip_caps->config_return_buffer_segment_size_in_kbytes; ip_params->meta_fifo_size_in_kentries = ip_caps->meta_fifo_size_in_kentries; ip_params->compressed_buffer_segment_size_in_kbytes = ip_caps->compressed_buffer_segment_size_in_kbytes; + ip_params->cursor_buffer_size = ip_caps->cursor_buffer_size; ip_params->max_flip_time_us = ip_caps->max_flip_time_us; ip_params->max_flip_time_lines = ip_caps->max_flip_time_lines; ip_params->hostvm_mode = ip_caps->hostvm_mode; @@ -316,28 +318,9 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in // Setup the appropriate p-state strategy if (display_cfg->stage3.performed && display_cfg->stage3.success) { - switch (display_cfg->stage3.pstate_switch_modes[plane_index]) { - case dml2_uclk_pstate_support_method_vactive: - case dml2_uclk_pstate_support_method_vblank: - case dml2_uclk_pstate_support_method_fw_subvp_phantom: - case dml2_uclk_pstate_support_method_fw_drr: - case dml2_uclk_pstate_support_method_fw_vactive_drr: - case dml2_uclk_pstate_support_method_fw_vblank_drr: - case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr: - programming->plane_programming[plane_index].uclk_pstate_support_method = display_cfg->stage3.pstate_switch_modes[plane_index]; - break; - case dml2_uclk_pstate_support_method_reserved_hw: - case dml2_uclk_pstate_support_method_reserved_fw: - case dml2_uclk_pstate_support_method_reserved_fw_drr_fixed: - case dml2_uclk_pstate_support_method_reserved_fw_drr_var: - case dml2_uclk_pstate_support_method_not_supported: - case dml2_uclk_pstate_support_method_count: - default: - programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; - break; - } + programming->plane_programming[plane_index].uclk_pstate_support_method = display_cfg->stage3.pstate_switch_modes[plane_index]; } else { - programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; + programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na; } dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); @@ -360,7 +343,8 @@ static void pack_mode_programming_params_with_implicit_subvp(struct dml2_core_in /* unconditionally populate fams2 params */ dml2_core_calcs_get_stream_fams2_programming(&core->clean_me_up.mode_lib, display_cfg, - &programming->stream_programming[main_plane->stream_index].fams2_params, + &programming->stream_programming[main_plane->stream_index].fams2_base_params, + &programming->stream_programming[main_plane->stream_index].fams2_sub_params, programming->stream_programming[main_plane->stream_index].uclk_pstate_method, plane_index); @@ -572,18 +556,18 @@ bool core_dcn4_mode_programming(struct dml2_core_mode_programming_in_out *in_out in_out->programming->plane_programming[plane_index].num_dpps_required = core->clean_me_up.mode_lib.mp.NoOfDPP[plane_index]; if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_main_pipe) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; else if (in_out->programming->display_config.plane_descriptors[plane_index].overrides.legacy_svp_config == dml2_svp_mode_override_phantom_pipe_no_data_return) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_fw_subvp_phantom; + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_fw_svp; else { if (core->clean_me_up.mode_lib.mp.MaxActiveDRAMClockChangeLatencySupported[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vactive; + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vactive; else if (core->clean_me_up.mode_lib.mp.TWait[plane_index] >= core->clean_me_up.mode_lib.soc.power_management_parameters.dram_clk_change_blackout_us) - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_vblank; + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_vblank; else - in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_uclk_pstate_support_method_not_supported; + in_out->programming->plane_programming[plane_index].uclk_pstate_support_method = dml2_pstate_method_na; } dml2_core_calcs_get_mall_allocation(&core->clean_me_up.mode_lib, &in_out->programming->plane_programming[plane_index].surface_size_mall_bytes, dml_internal_pipe_index); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c index 601320b1be81..b9ec243cf9ba 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.c @@ -11,6 +11,9 @@ #define DML2_MAX_FMT_420_BUFFER_WIDTH 4096 #define DML_MAX_NUM_OF_SLICES_PER_DSC 4 +#define DML_MAX_COMPRESSION_RATIO 4 +//#define DML_MODE_SUPPORT_USE_DPM_DRAM_BW +//#define DML_GLOBAL_PREFETCH_CHECK #define ALLOW_SDPIF_RATE_LIMIT_PRE_CSTATE const char *dml2_core_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type) @@ -132,9 +135,9 @@ static void dml2_print_mode_support_info(const struct dml2_core_internal_mode_su dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); if (!fail_only || support->VRatioInPrefetchSupported == 0) dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); - if (!fail_only || support->PTEBufferSizeNotExceeded == 1) + if (!fail_only || support->PTEBufferSizeNotExceeded == 0) dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); - if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1) + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); if (!fail_only || support->ExceededMALLSize == 1) dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); @@ -315,12 +318,11 @@ dml_get_var_func(meta_trip_memory_us, double, mode_lib->mp.MetaTripToMemory); dml_get_var_func(wm_fclk_change, double, mode_lib->mp.Watermark.FCLKChangeWatermark); dml_get_var_func(wm_usr_retraining, double, mode_lib->mp.Watermark.USRRetrainingWatermark); -dml_get_var_func(wm_g6_temp_read, double, mode_lib->mp.Watermark.g6_temp_read_watermark_us); +dml_get_var_func(wm_temp_read_or_ppt, double, mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us); dml_get_var_func(wm_dram_clock_change, double, mode_lib->mp.Watermark.DRAMClockChangeWatermark); dml_get_var_func(fraction_of_urgent_bandwidth, double, mode_lib->mp.FractionOfUrgentBandwidth); dml_get_var_func(fraction_of_urgent_bandwidth_imm_flip, double, mode_lib->mp.FractionOfUrgentBandwidthImmediateFlip); dml_get_var_func(fraction_of_urgent_bandwidth_mall, double, mode_lib->mp.FractionOfUrgentBandwidthMALL); -dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency); dml_get_var_func(wm_writeback_dram_clock_change, double, mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); dml_get_var_func(wm_writeback_fclk_change, double, mode_lib->mp.Watermark.WritebackFCLKChangeWatermark); dml_get_var_func(stutter_efficiency, double, mode_lib->mp.StutterEfficiency); @@ -355,7 +357,9 @@ dml_get_var_func(svp_prefetch_urg_bw_available_sdp, double, mode_lib->mp.urg_ban dml_get_var_func(svp_prefetch_urg_bw_available_dram, double, mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_svp_prefetch][dml2_core_internal_bw_dram]); dml_get_var_func(svp_prefetch_urg_bw_available_dram_vm_only, double, mode_lib->mp.urg_bandwidth_available_vm_only[dml2_core_internal_soc_state_svp_prefetch]); +dml_get_var_func(urgent_latency, double, mode_lib->mp.UrgentLatency); dml_get_var_func(max_urgent_latency_us, double, mode_lib->ms.support.max_urgent_latency_us); +dml_get_var_func(max_non_urgent_latency_us, double, mode_lib->ms.support.max_non_urgent_latency_us); dml_get_var_func(avg_non_urgent_latency_us, double, mode_lib->ms.support.avg_non_urgent_latency_us); dml_get_var_func(avg_urgent_latency_us, double, mode_lib->ms.support.avg_urgent_latency_us); @@ -466,6 +470,24 @@ static bool dml_is_420(enum dml2_source_format_class source_format) case dml2_420_12: val = 1; break; + case dml2_422_planar_8: + val = 0; + break; + case dml2_422_planar_10: + val = 0; + break; + case dml2_422_planar_12: + val = 0; + break; + case dml2_422_packed_8: + val = 0; + break; + case dml2_422_packed_10: + val = 0; + break; + case dml2_422_packed_12: + val = 0; + break; case dml2_rgbe_alpha: val = 0; break; @@ -487,32 +509,31 @@ static bool dml_is_420(enum dml2_source_format_class source_format) static unsigned int dml_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) { - switch (sw_mode) { - case (dml2_sw_linear): - return 256; break; - case (dml2_sw_256b_2d): - return 256; break; - case (dml2_sw_4kb_2d): - return 4096; break; - case (dml2_sw_64kb_2d): - return 65536; break; - case (dml2_sw_256kb_2d): - return 262144; break; - case (dml2_gfx11_sw_linear): - return 256; break; - case (dml2_gfx11_sw_64kb_d): - return 65536; break; - case (dml2_gfx11_sw_64kb_d_t): - return 65536; break; - case (dml2_gfx11_sw_64kb_d_x): - return 65536; break; - case (dml2_gfx11_sw_64kb_r_x): - return 65536; break; - case (dml2_gfx11_sw_256kb_d_x): - return 262144; break; - case (dml2_gfx11_sw_256kb_r_x): - return 262144; break; - default: + if (sw_mode == dml2_sw_linear) + return 256; + else if (sw_mode == dml2_sw_256b_2d) + return 256; + else if (sw_mode == dml2_sw_4kb_2d) + return 4096; + else if (sw_mode == dml2_sw_64kb_2d) + return 65536; + else if (sw_mode == dml2_sw_256kb_2d) + return 262144; + else if (sw_mode == dml2_gfx11_sw_linear) + return 256; + else if (sw_mode == dml2_gfx11_sw_64kb_d) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_t) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_r_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_256kb_d_x) + return 262144; + else if (sw_mode == dml2_gfx11_sw_256kb_r_x) + return 262144; + else { DML2_ASSERT(0); return 256; } @@ -820,7 +841,7 @@ static void CalculateSwathWidth( // Output unsigned int req_per_swath_ub_l[], unsigned int req_per_swath_ub_c[], - unsigned int SwathWidthSingleDPPY[], + unsigned int SwathWidthSingleDPPY[], // post-rotated plane width unsigned int SwathWidthSingleDPPC[], unsigned int SwathWidthY[], // per-pipe unsigned int SwathWidthC[], // per-pipe @@ -1403,7 +1424,6 @@ static unsigned int dscceComputeDelay( // N422/N420 operate at 2 pixels per clock unsigned int pixelsPerClock, padding_pixels, ssm_group_priming_delay, ssm_pipeline_delay, obsm_pipeline_delay, slice_padded_pixels, ixd_plus_padding, ixd_plus_padding_groups, cycles_per_group, group_delay, pipeline_delay, pixels, additional_group_delay, lines_to_reach_ixd, groups_to_reach_ixd, slice_width_groups, initial_xmit_delay, number_of_lines_to_reach_ixd, slice_width_modified; - if (pixelFormat == dml2_420) pixelsPerClock = 2; // #all other modes operate at 1 pixel per clock @@ -1428,7 +1448,6 @@ static unsigned int dscceComputeDelay( } } - //sub-stream multiplexer balance fifo priming delay in groups as per dsc standard if (bpc == 8) ssm_group_priming_delay = 83; @@ -1447,9 +1466,6 @@ static unsigned int dscceComputeDelay( //determine number of padded pixels in the last group of a slice line, computed as slice_padded_pixels = 3 * slice_width_groups - slice_width_modified; - - - //determine integer number of complete slice lines required to reach initial transmit delay without ssm delay considered number_of_lines_to_reach_ixd = initial_xmit_delay / slice_width_modified; @@ -1463,7 +1479,6 @@ static unsigned int dscceComputeDelay( //number of groups required for a slice to reach initial transmit delay is the sum of the padded initial transmit delay plus the ssm group priming delay groups_to_reach_ixd = ixd_plus_padding_groups + ssm_group_priming_delay; - //number of lines required to reach padded initial transmit delay in groups in slices to the left of the last horizontal slice //needs to be rounded up as a complete slice lines are buffered prior to initial transmit delay being reached in the last horizontal slice lines_to_reach_ixd = (groups_to_reach_ixd + slice_width_groups - 1) / slice_width_groups; //round up lines to reach ixd to next @@ -1506,7 +1521,6 @@ static unsigned int dscceComputeDelay( return pixels; } - //updated in dcn4 static unsigned int dscComputeDelay(enum dml2_output_format_class pixelFormat, enum dml2_output_encoder_class Output) { @@ -2090,7 +2104,6 @@ static void CalculateDCCConfiguration( yuv420 = 1; else yuv420 = 0; - horz_div_l = 1; horz_div_c = 1; vert_div_l = 1; @@ -2561,8 +2574,7 @@ static void calculate_mcache_setting( if (*p->num_mcaches_l) { l->avg_mcache_element_size_l = l->meta_row_width_l / *p->num_mcaches_l; } - - if (l->is_dual_plane && *p->num_mcaches_c) { + if (l->is_dual_plane) { l->avg_mcache_element_size_c = l->meta_row_width_c / *p->num_mcaches_c; if (!p->imall_enable || (*p->mall_comb_mcache_l == *p->mall_comb_mcache_c)) { @@ -2682,12 +2694,12 @@ static double dml_get_return_bandwidth_available( bool is_avg_bw, bool is_hvm_en, bool is_hvm_only, - double dcflk_mhz, + double dcfclk_mhz, double fclk_mhz, double dram_bw_mbps) { double return_bw_mbps = 0.; - double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcflk_mhz; + double ideal_sdp_bandwidth = (double)soc->return_bus_width_bytes * dcfclk_mhz; double ideal_fabric_bandwidth = fclk_mhz * (double)soc->fabric_datapath_to_dcn_data_return_bytes; double ideal_dram_bandwidth = dram_bw_mbps; //dram_speed_mts * soc->clk_table.dram_config.channel_count * soc->clk_table.dram_config.channel_width_bytes; @@ -2753,7 +2765,7 @@ static double dml_get_return_bandwidth_available( dml2_printf("DML::%s: is_hvm_only = %u\n", __func__, is_hvm_only); dml2_printf("DML::%s: state_type = %s\n", __func__, dml2_core_internal_soc_state_type_str(state_type)); dml2_printf("DML::%s: bw_type = %s\n", __func__, dml2_core_internal_bw_type_str(bw_type)); - dml2_printf("DML::%s: dcflk_mhz = %f\n", __func__, dcflk_mhz); + dml2_printf("DML::%s: dcfclk_mhz = %f\n", __func__, dcfclk_mhz); dml2_printf("DML::%s: fclk_mhz = %f\n", __func__, fclk_mhz); dml2_printf("DML::%s: ideal_sdp_bandwidth = %f\n", __func__, ideal_sdp_bandwidth); dml2_printf("DML::%s: ideal_fabric_bandwidth = %f\n", __func__, ideal_fabric_bandwidth); @@ -3816,8 +3828,8 @@ static void CalculateSwathAndDETConfiguration(struct dml2_core_internal_scratch p->SwathHeightC[k] = MaximumSwathHeightC[k] / 2; RoundedUpSwathSizeBytesY[k] = p->full_swath_bytes_l[k] / 2; RoundedUpSwathSizeBytesC[k] = p->full_swath_bytes_c[k] / 2; - p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; - p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64; + p->request_size_bytes_luma[k] = ((p->BytePerPixY[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; + p->request_size_bytes_chroma[k] = ((p->BytePerPixC[k] == 2) == dml_is_vertical_rotation(p->display_cfg->plane_descriptors[k].composition.rotation_angle)) ? 128 : 64;; } if (p->SwathHeightC[k] == 0) @@ -5069,20 +5081,18 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->trip_to_mem = 0.0; *p->Tvm_trips = 0.0; *p->Tr0_trips = 0.0; - s->Tvm_no_trip_oto = 0.0; - s->Tr0_no_trip_oto = 0.0; s->Tvm_trips_rounded = 0.0; s->Tr0_trips_rounded = 0.0; s->max_Tsw = 0.0; s->Lsw_oto = 0.0; - s->Tpre_rounded = 0.0; + *p->Tpre_rounded = 0.0; s->prefetch_bw_equ = 0.0; s->Tvm_equ = 0.0; s->Tr0_equ = 0.0; s->Tdmbf = 0.0; s->Tdmec = 0.0; s->Tdmsks = 0.0; - s->prefetch_sw_bytes = 0.0; + *p->prefetch_sw_bytes = 0.0; s->prefetch_bw_pr = 0.0; s->bytes_pp = 0.0; s->dep_bytes = 0.0; @@ -5207,6 +5217,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: setup_for_tdlut = %u\n", __func__, p->setup_for_tdlut); dml2_printf("DML::%s: tdlut_opt_time = %f\n", __func__, p->tdlut_opt_time); dml2_printf("DML::%s: tdlut_pte_bytes_per_frame = %u\n", __func__, p->tdlut_pte_bytes_per_frame); + dml2_printf("DML::%s: tdlut_drain_time = %f\n", __func__, p->tdlut_drain_time); #endif if (p->OutputFormat == dml2_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP)) @@ -5277,23 +5288,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC; } - s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (double)p->myPipe->DPPPerSurface; - if (p->myPipe->VRatio < 1.0) - s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr; - s->max_Tsw = (math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime); - - s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; - s->prefetch_bw_pr = s->prefetch_bw_pr * p->mall_prefetch_sdp_overhead_factor; - s->prefetch_sw_bytes = s->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; - s->prefetch_bw_oto = math_max2(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw); - - s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__; - s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); - s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); - - s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__; - s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0); - s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime); + *p->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC; + *p->prefetch_sw_bytes = *p->prefetch_sw_bytes * p->mall_prefetch_sdp_overhead_factor; vm_bytes = p->vm_bytes; // vm_bytes is dpde0_bytes_per_frame_ub_l + dpde0_bytes_per_frame_ub_c + 2*extra_dpde_bytes; extra_tdpe_bytes = (unsigned int)math_max2(0, (p->display_cfg->gpuvm_max_page_table_levels - 1) * 128); @@ -5302,57 +5298,103 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch vm_bytes = vm_bytes + p->tdlut_pte_bytes_per_frame + (p->display_cfg->gpuvm_enable ? extra_tdpe_bytes : 0); tdlut_row_bytes = (unsigned long) math_ceil2(p->tdlut_bytes_per_frame/2.0, 1.0); + + s->min_Lsw_oto = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_OTO__; + s->min_Lsw_oto = math_max2(s->min_Lsw_oto, p->tdlut_drain_time / s->LineTime); + s->min_Lsw_oto = math_max2(s->min_Lsw_oto, 2.0); + + // use vactive swath bw for prefetch oto and also cap prefetch_bw_oto to max_vratio_oto + // Note: in prefetch calculation, acounting is done mostly per-pipe. + // vactive swath bw represents the per-surface (aka per dml plane) bw to move vratio_l/c lines of bytes_l/c per line time + s->per_pipe_vactive_sw_bw = p->vactive_sw_bw_l / (double)p->myPipe->DPPPerSurface; + + // one-to-one prefetch bw as one line of bytes per line time (as per vratio_pre_l/c = 1) + s->prefetch_bw_oto = (p->swath_width_luma_ub * p->myPipe->BytePerPixelY) / s->LineTime; + + if (p->myPipe->BytePerPixelC > 0) { + s->per_pipe_vactive_sw_bw += p->vactive_sw_bw_c / (double)p->myPipe->DPPPerSurface; + s->prefetch_bw_oto += (p->swath_width_chroma_ub * p->myPipe->BytePerPixelC) / s->LineTime; + } + + s->prefetch_bw_oto = math_max2(s->per_pipe_vactive_sw_bw, s->prefetch_bw_oto) * p->mall_prefetch_sdp_overhead_factor; + + s->prefetch_bw_oto = math_min2(s->prefetch_bw_oto, *p->prefetch_sw_bytes/(s->min_Lsw_oto*s->LineTime)); + + s->Lsw_oto = math_ceil2(4.0 * *p->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, 1.0) / 4.0; + s->prefetch_bw_oto = math_max3(s->prefetch_bw_oto, p->vm_bytes * p->HostVMInefficiencyFactor / (31 * s->LineTime) - *p->Tno_bw, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / (15 * s->LineTime)); - s->Lsw_oto = math_ceil2(4.0 * math_max2(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0; + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: vactive_sw_bw_l = %f\n", __func__, p->vactive_sw_bw_l); + dml2_printf("DML::%s: vactive_sw_bw_c = %f\n", __func__, p->vactive_sw_bw_c); + dml2_printf("DML::%s: per_pipe_vactive_sw_bw = %f\n", __func__, s->per_pipe_vactive_sw_bw); +#endif if (p->display_cfg->gpuvm_enable == true) { - s->Tvm_no_trip_oto = math_max2( + s->Tvm_oto = math_max3( + *p->Tvm_trips, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto, s->LineTime / 4.0); - s->Tvm_oto = math_max2( - *p->Tvm_trips, - s->Tvm_no_trip_oto); + #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tvm_oto max0 = %f\n", __func__, *p->Tvm_trips); dml2_printf("DML::%s: Tvm_oto max1 = %f\n", __func__, *p->Tno_bw + vm_bytes * p->HostVMInefficiencyFactor / s->prefetch_bw_oto); dml2_printf("DML::%s: Tvm_oto max2 = %f\n", __func__, s->LineTime / 4.0); #endif } else { - s->Tvm_no_trip_oto = s->Tvm_trips_rounded; s->Tvm_oto = s->Tvm_trips_rounded; } if ((p->display_cfg->gpuvm_enable == true || p->setup_for_tdlut || dcc_mrq_enable)) { - s->Tr0_no_trip_oto = math_max2( + s->Tr0_oto = math_max3( + *p->Tr0_trips, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto, s->LineTime / 4.0); - s->Tr0_oto = math_max2( - *p->Tr0_trips, - s->Tr0_no_trip_oto); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tr0_oto max0 = %f\n", __func__, *p->Tr0_trips); dml2_printf("DML::%s: Tr0_oto max1 = %f\n", __func__, (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) / s->prefetch_bw_oto); dml2_printf("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime / 4); #endif - } else { - s->Tr0_no_trip_oto = (s->LineTime - s->Tvm_oto) / 4.0; - s->Tr0_oto = s->Tr0_no_trip_oto; - } + } else + s->Tr0_oto = s->LineTime / 4.0; s->Tvm_oto_lines = math_ceil2(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0; s->Tr0_oto_lines = math_ceil2(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0; s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto; +#ifdef DML_GLOBAL_PREFETCH_CHECK + dml2_printf("DML::%s: impacted_Tpre = %f\n", __func__, p->impacted_dst_y_pre); + if (p->impacted_dst_y_pre > 0) { + dml2_printf("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto); + s->dst_y_prefetch_oto = math_max2(s->dst_y_prefetch_oto, p->impacted_dst_y_pre); + dml2_printf("DML::%s: dst_y_prefetch_oto = %f (impacted)\n", __func__, s->dst_y_prefetch_oto); + } +#endif + *p->Tpre_oto = s->dst_y_prefetch_oto * s->LineTime; + //To (time for delay after scaler) in line time Lo = (unsigned int)(*p->DSTYAfterScaler + (double)*p->DSTXAfterScaler / (double)p->myPipe->HTotal); + s->min_Lsw_equ = math_max2(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML2_CALCS_MAX_VRATIO_PRE_EQU__; + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, p->tdlut_drain_time / s->LineTime); + s->min_Lsw_equ = math_max2(s->min_Lsw_equ, 2.0); //Tpre_equ in line time if (p->DynamicMetadataVMEnabled && p->DynamicMetadataEnable) s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, *p->Tvm_trips) + s->TWait_p) / s->LineTime - Lo; else s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + math_max2(p->TCalc, p->ExtraLatencyPrefetch) + s->TWait_p) / s->LineTime - Lo; + +#ifdef DML_GLOBAL_PREFETCH_CHECK + s->dst_y_prefetch_equ_impacted = math_max2(p->impacted_dst_y_pre, s->dst_y_prefetch_equ); + + s->dst_y_prefetch_equ_impacted = math_min2(s->dst_y_prefetch_equ_impacted, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH + + if (s->dst_y_prefetch_equ_impacted > s->dst_y_prefetch_equ) + s->dst_y_prefetch_equ -= s->dst_y_prefetch_equ_impacted - s->dst_y_prefetch_equ; +#endif + s->dst_y_prefetch_equ = math_min2(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH #ifdef __DML_VBA_DEBUG__ @@ -5370,7 +5412,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC); dml2_printf("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC); dml2_printf("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub); - dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes); + dml2_printf("DML::%s: prefetch_sw_bytes = %f\n", __func__, *p->prefetch_sw_bytes); dml2_printf("DML::%s: max_Tsw = %f\n", __func__, s->max_Tsw); dml2_printf("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp); dml2_printf("DML::%s: vm_bytes = %u\n", __func__, vm_bytes); @@ -5394,7 +5436,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch #endif double Tpre = s->dst_y_prefetch_equ * s->LineTime; s->dst_y_prefetch_equ = math_floor2(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0; - s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; + *p->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ); @@ -5420,7 +5462,7 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: vm_bytes: %f (hvm inefficiency scaled)\n", __func__, vm_bytes*p->HostVMInefficiencyFactor); dml2_printf("DML::%s: row_bytes: %f (hvm inefficiency scaled, 1 row)\n", __func__, p->PixelPTEBytesPerRow*p->HostVMInefficiencyFactor+p->meta_row_bytes+tdlut_row_bytes); dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, s->Tpre_rounded, (s->Tpre_rounded - Tpre)); + dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre)); dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); #endif @@ -5434,78 +5476,85 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch // Tpre_rounded is Tpre rounding to 2-bit fraction // Tvm_trips_rounded is Tvm_trips ceiling to 1/4 line time // Tr0_trips_rounded is Tr0_trips ceiling to 1/4 line time - // So that means prefetch bw calculated can be higher since the total time availabe for prefetch is less - bool min_Lsw_equ_ok = s->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime; + // So that means prefetch bw calculated can be higher since the total time available for prefetch is less + bool min_Lsw_equ_ok = *p->Tpre_rounded >= s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded + s->min_Lsw_equ*s->LineTime; + bool tpre_gt_req_latency = true; +#if 0 + // Check that Tpre_rounded is big enough if all of the stages of the prefetch are time constrained. + // The terms Tvm_trips_rounded and Tr0_trips_rounded represent the min time constraints for the VM and row stages. + // Normally, these terms cover the overall time constraint for Tpre >= (Tex + max{Ttrip, Turg}), but if these terms are at their minimum, an explicit check is necessary. + tpre_gt_req_latency = *p->Tpre_rounded > (math_max2(p->Turg, s->trip_to_mem) + p->ExtraLatencyPrefetch); +#endif - if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok) { + if (s->dst_y_prefetch_equ > 1 && min_Lsw_equ_ok && tpre_gt_req_latency) { s->prefetch_bw1 = 0.; s->prefetch_bw2 = 0.; s->prefetch_bw3 = 0.; s->prefetch_bw4 = 0.; // prefetch_bw1: VM + 2*R0 + SW - if (s->Tpre_rounded - *p->Tno_bw > 0) { + if (*p->Tpre_rounded - *p->Tno_bw > 0) { s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) - + s->prefetch_sw_bytes) - / (s->Tpre_rounded - *p->Tno_bw); - s->Tsw_est1 = s->prefetch_sw_bytes / s->prefetch_bw1; + + *p->prefetch_sw_bytes) + / (*p->Tpre_rounded - *p->Tno_bw); + s->Tsw_est1 = *p->prefetch_sw_bytes / s->prefetch_bw1; } else s->prefetch_bw1 = 0; dml2_printf("DML::%s: prefetch_bw1: %f\n", __func__, s->prefetch_bw1); - if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { + if ((s->Tsw_est1 < s->min_Lsw_equ * s->LineTime) && (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0)) { s->prefetch_bw1 = (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / - (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); + (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: vm and 2 rows bytes = %f\n", __func__, (vm_bytes * p->HostVMInefficiencyFactor + 2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes))); - dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, s->Tpre_rounded); + dml2_printf("DML::%s: Tpre_rounded = %f\n", __func__, *p->Tpre_rounded); dml2_printf("DML::%s: minus term = %f\n", __func__, s->min_Lsw_equ * s->LineTime + 0.75 * s->LineTime + *p->Tno_bw); dml2_printf("DML::%s: min_Lsw_equ = %f\n", __func__, s->min_Lsw_equ); dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); dml2_printf("DML::%s: Tno_bw = %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); + dml2_printf("DML::%s: Time to fetch vm and 2 rows = %f\n", __func__, (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw)); dml2_printf("DML::%s: prefetch_bw1: %f (updated)\n", __func__, s->prefetch_bw1); #endif } // prefetch_bw2: VM + SW - if (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) { - s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) / - (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded); - s->Tsw_est2 = s->prefetch_sw_bytes / s->prefetch_bw2; + if (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded > 0) { + s->prefetch_bw2 = (vm_bytes * p->HostVMInefficiencyFactor + *p->prefetch_sw_bytes) / + (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded); + s->Tsw_est2 = *p->prefetch_sw_bytes / s->prefetch_bw2; } else s->prefetch_bw2 = 0; dml2_printf("DML::%s: prefetch_bw2: %f\n", __func__, s->prefetch_bw2); - if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { - s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (s->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); + if ((s->Tsw_est2 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime) > 0)) { + s->prefetch_bw2 = vm_bytes * p->HostVMInefficiencyFactor / (*p->Tpre_rounded - *p->Tno_bw - 2.0 * s->Tr0_trips_rounded - s->min_Lsw_equ * s->LineTime - 0.25 * s->LineTime); dml2_printf("DML::%s: prefetch_bw2: %f (updated)\n", __func__, s->prefetch_bw2); } // prefetch_bw3: 2*R0 + SW - if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) { - s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + s->prefetch_sw_bytes) / - (s->Tpre_rounded - s->Tvm_trips_rounded); - s->Tsw_est3 = s->prefetch_sw_bytes / s->prefetch_bw3; + if (*p->Tpre_rounded - s->Tvm_trips_rounded > 0) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes) + *p->prefetch_sw_bytes) / + (*p->Tpre_rounded - s->Tvm_trips_rounded); + s->Tsw_est3 = *p->prefetch_sw_bytes / s->prefetch_bw3; } else s->prefetch_bw3 = 0; dml2_printf("DML::%s: prefetch_bw3: %f\n", __func__, s->prefetch_bw3); - if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { - s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (s->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); + if ((s->Tsw_est3 < s->min_Lsw_equ * s->LineTime) && ((*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded) > 0)) { + s->prefetch_bw3 = (2 * (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + p->meta_row_bytes + tdlut_row_bytes)) / (*p->Tpre_rounded - s->min_Lsw_equ * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded); dml2_printf("DML::%s: prefetch_bw3: %f (updated)\n", __func__, s->prefetch_bw3); } // prefetch_bw4: SW - if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0) - s->prefetch_bw4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded); + if (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0) + s->prefetch_bw4 = *p->prefetch_sw_bytes / (*p->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded); else s->prefetch_bw4 = 0; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw); - dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, s->Tpre_rounded, (s->Tpre_rounded - Tpre)); + dml2_printf("DML::%s: Tpre=%f Tpre_rounded: %f, delta=%f\n", __func__, Tpre, *p->Tpre_rounded, (*p->Tpre_rounded - Tpre)); dml2_printf("DML::%s: Tvm_trips=%f Tvm_trips_rounded: %f, delta=%f\n", __func__, *p->Tvm_trips, s->Tvm_trips_rounded, (s->Tvm_trips_rounded - *p->Tvm_trips)); dml2_printf("DML::%s: Tr0_trips=%f Tr0_trips_rounded: %f, delta=%f\n", __func__, *p->Tr0_trips, s->Tr0_trips_rounded, (s->Tr0_trips_rounded - *p->Tr0_trips)); dml2_printf("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1); @@ -5617,9 +5666,6 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch dml2_printf("DML::%s: Tvm_equ = %f\n", __func__, s->Tvm_equ); dml2_printf("DML::%s: Tr0_equ = %f\n", __func__, s->Tr0_equ); #endif - // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank) - s->Lsw_equ = s->dst_y_prefetch_equ - math_ceil2(4.0 * (s->Tvm_equ + 2 * s->Tr0_equ) / s->LineTime, 1.0) / 4.0; - // Use the more stressful prefetch schedule if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) { *p->dst_y_prefetch = s->dst_y_prefetch_oto; @@ -5628,28 +5674,29 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - s->dst_y_per_vm_no_trip_vblank = math_ceil2(4.0 * s->Tvm_no_trip_oto / s->LineTime, 1.0) / 4.0; - s->dst_y_per_row_no_trip_vblank = math_ceil2(4.0 * s->Tr0_no_trip_oto / s->LineTime, 1.0) / 4.0; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Using oto scheduling for prefetch\n", __func__); #endif + } else { *p->dst_y_prefetch = s->dst_y_prefetch_equ; + + if (s->dst_y_prefetch_equ < s->dst_y_prefetch_equ_impacted) + *p->dst_y_prefetch = s->dst_y_prefetch_equ_impacted; + s->TimeForFetchingVM = s->Tvm_equ; s->TimeForFetchingRowInVBlank = s->Tr0_equ; - *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; - *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; - s->dst_y_per_vm_no_trip_vblank = *p->dst_y_per_vm_vblank; - s->dst_y_per_row_no_trip_vblank = *p->dst_y_per_row_vblank; + *p->dst_y_per_vm_vblank = math_ceil2(4.0 * s->TimeForFetchingVM / s->LineTime, 1.0) / 4.0; + *p->dst_y_per_row_vblank = math_ceil2(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: Using equ bw scheduling for prefetch\n", __func__); #endif } - /* take worst case Lsw to calculate bandwidth requirement regardless of schedule */ - s->LinesToRequestPrefetchPixelData = math_min2(s->Lsw_equ, s->Lsw_oto); // Lsw + // Lsw = dst_y_prefetch - (dst_y_per_vm_vblank + 2*dst_y_per_row_vblank) + s->LinesToRequestPrefetchPixelData = *p->dst_y_prefetch - *p->dst_y_per_vm_vblank - 2 * *p->dst_y_per_row_vblank; // Lsw s->cursor_prefetch_bytes = (unsigned int)math_max2(p->cursor_bytes_per_chunk, 4 * p->cursor_bytes_per_line); *p->prefetch_cursor_bw = p->num_cursors * s->cursor_prefetch_bytes / (s->LinesToRequestPrefetchPixelData * s->LineTime); @@ -5749,8 +5796,10 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch } else { dml2_printf("DML::%s: No time to prefetch! dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ); - dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", - __func__, min_Lsw_equ_ok, s->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); + dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded (%f) should be >= Tvm_trips_rounded (%f) + 2.0*Tr0_trips_rounded (%f) + min_Tsw_equ (%f)\n", + __func__, min_Lsw_equ_ok, *p->Tpre_rounded, s->Tvm_trips_rounded, 2.0*s->Tr0_trips_rounded, s->min_Lsw_equ*s->LineTime); + dml2_printf("DML::%s: No time to prefetch! min_Lsw_equ_ok = %d, Tpre_rounded+Tvm_trips_rounded+2.0*Tr0_trips_rounded+min_Tsw_equ (%f) should be > \n", + __func__, tpre_gt_req_latency, (s->min_Lsw_equ*s->LineTime + s->Tvm_trips_rounded + 2.0*s->Tr0_trips_rounded), p->Turg, s->trip_to_mem, p->ExtraLatencyPrefetch); s->NoTimeToPrefetch = true; s->TimeForFetchingVM = 0; s->TimeForFetchingRowInVBlank = 0; @@ -5769,13 +5818,13 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (vm_bytes == 0) { prefetch_vm_bw = 0; - } else if (s->dst_y_per_vm_no_trip_vblank > 0) { + } else if (*p->dst_y_per_vm_vblank > 0) { #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor); dml2_printf("DML::%s: dst_y_per_vm_vblank = %f\n", __func__, *p->dst_y_per_vm_vblank); dml2_printf("DML::%s: LineTime = %f\n", __func__, s->LineTime); #endif - prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (s->dst_y_per_vm_no_trip_vblank * s->LineTime); + prefetch_vm_bw = vm_bytes * p->HostVMInefficiencyFactor / (*p->dst_y_per_vm_vblank * s->LineTime); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); #endif @@ -5787,8 +5836,8 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch if (p->PixelPTEBytesPerRow == 0 && tdlut_row_bytes == 0) { prefetch_row_bw = 0; - } else if (s->dst_y_per_row_no_trip_vblank > 0) { - prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (s->dst_y_per_row_no_trip_vblank * s->LineTime); + } else if (*p->dst_y_per_row_vblank > 0) { + prefetch_row_bw = (p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + tdlut_row_bytes) / (*p->dst_y_per_row_vblank * s->LineTime); #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow); @@ -5828,6 +5877,171 @@ static bool CalculatePrefetchSchedule(struct dml2_core_internal_scratch *scratch return s->NoTimeToPrefetch; } +static unsigned int get_num_lb_source_lines(unsigned int max_line_buffer_lines, + unsigned int line_buffer_size_bits, + unsigned int num_pipes, + unsigned int vp_width, + unsigned int vp_height, + double h_ratio, + enum dml2_rotation_angle rotation_angle) +{ + unsigned int num_lb_source_lines = 0; + double lb_bit_per_pixel = 57.0; + unsigned recin_width = vp_width/num_pipes; + + if (dml_is_vertical_rotation(rotation_angle)) + recin_width = vp_height/num_pipes; + + num_lb_source_lines = (unsigned int) math_min2((double) max_line_buffer_lines, + math_floor2(line_buffer_size_bits / lb_bit_per_pixel / (recin_width / math_max2(h_ratio, 1.0)), 1.0)); + + return num_lb_source_lines; +} + +static unsigned int find_max_impact_plane(unsigned int this_plane_idx, unsigned int num_planes, unsigned int Trpd_dcfclk_cycles[]) +{ + int max_value = -1; + int max_idx = -1; + for (unsigned int i = 0; i < num_planes; i++) { + if (i != this_plane_idx && (int) Trpd_dcfclk_cycles[i] > max_value) { + max_value = Trpd_dcfclk_cycles[i]; + max_idx = i; + } + } + if (max_idx <= 0) { + dml2_assert(max_idx >= 0); + max_idx = this_plane_idx; + } + + return max_idx; +} + +static double calculate_impacted_Tsw(unsigned int exclude_plane_idx, unsigned int num_planes, double *prefetch_swath_bytes, double bw_mbps) +{ + double sum = 0.; + for (unsigned int i = 0; i < num_planes; i++) { + if (i != exclude_plane_idx) { + sum += prefetch_swath_bytes[i]; + } + } + return sum / bw_mbps; +} + +// a global check against the aggregate effect of the per plane prefetch schedule +static bool CheckGlobalPrefetchAdmissibility(struct dml2_core_internal_scratch *scratch, + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *p) +{ + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals *s = &scratch->CheckGlobalPrefetchAdmissibility_locals; + unsigned int i, k; + + memset(s, 0, sizeof(struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals)); + + *p->recalc_prefetch_schedule = 0; + s->prefetch_global_check_passed = 1; + // worst case if the rob and cdb is fully hogged + s->max_Trpd_dcfclk_cycles = (unsigned int) math_ceil2((p->rob_buffer_size_kbytes*1024 + p->compressed_buffer_size_kbytes*DML_MAX_COMPRESSION_RATIO*1024)/64.0, 1.0); +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: num_active_planes = %d\n", __func__, p->num_active_planes); + dml2_printf("DML::%s: rob_buffer_size_kbytes = %d\n", __func__, p->rob_buffer_size_kbytes); + dml2_printf("DML::%s: compressed_buffer_size_kbytes = %d\n", __func__, p->compressed_buffer_size_kbytes); + dml2_printf("DML::%s: estimated_urg_bandwidth_required_mbps = %f\n", __func__, p->estimated_urg_bandwidth_required_mbps); + dml2_printf("DML::%s: estimated_dcfclk_mhz = %f\n", __func__, p->estimated_dcfclk_mhz); + dml2_printf("DML::%s: max_Trpd_dcfclk_cycles = %u\n", __func__, s->max_Trpd_dcfclk_cycles); +#endif + + // calculate the return impact from each plane, request is 256B per dcfclk + for (i = 0; i < p->num_active_planes; i++) { + s->src_detile_buf_size_bytes_l[i] = p->detile_buffer_size_bytes_l[i]; + s->src_detile_buf_size_bytes_c[i] = p->detile_buffer_size_bytes_c[i]; + s->src_swath_bytes_l[i] = p->full_swath_bytes_l[i]; + s->src_swath_bytes_c[i] = p->full_swath_bytes_c[i]; + + if (p->pixel_format[i] == dml2_420_10) { + s->src_detile_buf_size_bytes_l[i] = (unsigned int) (s->src_detile_buf_size_bytes_l[i] * 1.5); + s->src_detile_buf_size_bytes_c[i] = (unsigned int) (s->src_detile_buf_size_bytes_c[i] * 1.5); + s->src_swath_bytes_l[i] = (unsigned int) (s->src_swath_bytes_l[i] * 1.5); + s->src_swath_bytes_c[i] = (unsigned int) (s->src_swath_bytes_c[i] * 1.5); + } + + s->burst_bytes_to_fill_det = (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_l[i] / p->chunk_bytes_l, 1) * p->chunk_bytes_l); + s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_l[i] / p->swath_height_l[i], 1) * s->src_swath_bytes_l[i]); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: i=%u pixel_format = %d\n", __func__, i, p->pixel_format[i]); + dml2_printf("DML::%s: i=%u chunk_bytes_l = %d\n", __func__, i, p->chunk_bytes_l); + dml2_printf("DML::%s: i=%u lb_source_lines_l = %d\n", __func__, i, p->lb_source_lines_l[i]); + dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_l=%d\n", __func__, i, s->src_detile_buf_size_bytes_l[i]); + dml2_printf("DML::%s: i=%u src_swath_bytes_l=%d\n", __func__, i, s->src_swath_bytes_l[i]); + dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d (luma)\n", __func__, i, s->burst_bytes_to_fill_det); +#endif + + if (s->src_swath_bytes_c[i] > 0) { // dual_plane + s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(s->src_detile_buf_size_bytes_c[i] / p->chunk_bytes_c, 1) * p->chunk_bytes_c); + + if (p->pixel_format[i] == dml2_422_planar_8 || p->pixel_format[i] == dml2_422_planar_10 || p->pixel_format[i] == dml2_422_planar_12) { + s->burst_bytes_to_fill_det += (unsigned int) (math_floor2(p->lb_source_lines_c[i] / p->swath_height_c[i], 1) * s->src_swath_bytes_c[i]); + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: i=%u chunk_bytes_c = %d\n", __func__, i, p->chunk_bytes_c); + dml2_printf("DML::%s: i=%u lb_source_lines_c = %d\n", __func__, i, p->lb_source_lines_c[i]); + dml2_printf("DML::%s: i=%u src_detile_buf_size_bytes_c=%d\n", __func__, i, s->src_detile_buf_size_bytes_c[i]); + dml2_printf("DML::%s: i=%u src_swath_bytes_c=%d\n", __func__, i, s->src_swath_bytes_c[i]); +#endif + } + + s->time_to_fill_det_us = (double) s->burst_bytes_to_fill_det / (256 * p->estimated_dcfclk_mhz); // fill time assume full burst at request rate + s->accumulated_return_path_dcfclk_cycles[i] = (unsigned int) math_ceil2(((DML_MAX_COMPRESSION_RATIO-1) * 64 * p->estimated_dcfclk_mhz) * s->time_to_fill_det_us / 64.0, 1.0); //for 64B per DCFClk + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: i=%u burst_bytes_to_fill_det=%d\n", __func__, i, s->burst_bytes_to_fill_det); + dml2_printf("DML::%s: i=%u time_to_fill_det_us=%f\n", __func__, i, s->time_to_fill_det_us); + dml2_printf("DML::%s: i=%u accumulated_return_path_dcfclk_cycles=%u\n", __func__, i, s->accumulated_return_path_dcfclk_cycles[i]); +#endif + // clamping to worst case delay which is one which occupy the full rob+cdb + if (s->accumulated_return_path_dcfclk_cycles[i] > s->max_Trpd_dcfclk_cycles) + s->accumulated_return_path_dcfclk_cycles[i] = s->max_Trpd_dcfclk_cycles; + } + + // Figure out the impacted prefetch time for each plane + // if impacted_Tre is > equ bw Tpre, we need to fail the prefetch schedule as we need a higher state to support the bw + for (i = 0; i < p->num_active_planes; i++) { + k = find_max_impact_plane(i, p->num_active_planes, s->accumulated_return_path_dcfclk_cycles); // plane k causes most impact to plane i + // the rest of planes (except for k) complete for bw + p->impacted_dst_y_pre[i] = s->accumulated_return_path_dcfclk_cycles[k]/p->estimated_dcfclk_mhz; + p->impacted_dst_y_pre[i] += calculate_impacted_Tsw(k, p->num_active_planes, p->prefetch_sw_bytes, p->estimated_urg_bandwidth_required_mbps); + p->impacted_dst_y_pre[i] = math_ceil2(p->impacted_dst_y_pre[i] / p->line_time[i], 0.25); + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: i=%u impacted_Tpre=%f (k=%u)\n", __func__, i, p->impacted_dst_y_pre[i], k); +#endif + } + + if (p->Tpre_rounded != NULL && p->Tpre_oto != NULL) { + for (i = 0; i < p->num_active_planes; i++) { + if (p->impacted_dst_y_pre[i] > p->dst_y_prefetch[i]) { + s->prefetch_global_check_passed = 0; + *p->recalc_prefetch_schedule = 1; + } +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: i=%u Tpre_rounded=%f\n", __func__, i, p->Tpre_rounded[i]); + dml2_printf("DML::%s: i=%u Tpre_oto=%f\n", __func__, i, p->Tpre_oto[i]); +#endif + } + } else { + // likely a mode programming calls, assume support, and no recalc - not used anyways + s->prefetch_global_check_passed = 1; + *p->recalc_prefetch_schedule = 0; + } + +#ifdef __DML_VBA_DEBUG__ + dml2_printf("DML::%s: prefetch_global_check_passed=%u\n", __func__, s->prefetch_global_check_passed); + dml2_printf("DML::%s: recalc_prefetch_schedule=%u\n", __func__, *p->recalc_prefetch_schedule); +#endif + + return s->prefetch_global_check_passed; +} + static void calculate_peak_bandwidth_required( struct dml2_core_internal_scratch *s, struct dml2_core_calcs_calculate_peak_bandwidth_required_params *p) @@ -6046,7 +6260,7 @@ static void check_urgent_bandwidth_support( double *frac_urg_bandwidth_nom, double *frac_urg_bandwidth_mall, bool *vactive_bandwidth_support_ok, // vactive ok - bool *bandwidth_support_ok, // max of vm, prefetch, vactive all ok + bool *bandwidth_support_ok,// max of vm, prefetch, vactive all ok unsigned int mall_allocated_for_dcn_mbytes, double non_urg_bandwidth_required[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max], @@ -6116,7 +6330,6 @@ static void check_urgent_bandwidth_support( } } #endif - } static double get_bandwidth_available_for_immediate_flip(enum dml2_core_internal_soc_state_type eval_state, @@ -6438,7 +6651,7 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( p->Watermark->Z8StutterExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; p->Watermark->Z8StutterEnterPlusExitWatermark += p->mmSOCParameters.max_urgent_latency_us + p->mmSOCParameters.df_response_time_us; } - p->Watermark->g6_temp_read_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; + p->Watermark->temp_read_or_ppt_watermark_us = p->mmSOCParameters.g6_temp_read_blackout_us + p->Watermark->UrgentWatermark; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: UrgentLatency = %f\n", __func__, p->mmSOCParameters.UrgentLatency); @@ -6454,12 +6667,12 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( dml2_printf("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->StutterEnterPlusExitWatermark); dml2_printf("DML::%s: Z8StutterExitWatermark = %f\n", __func__, p->Watermark->Z8StutterExitWatermark); dml2_printf("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, p->Watermark->Z8StutterEnterPlusExitWatermark); - dml2_printf("DML::%s: g6_temp_read_watermark_us = %f\n", __func__, p->Watermark->g6_temp_read_watermark_us); + dml2_printf("DML::%s: temp_read_or_ppt_watermark_us = %f\n", __func__, p->Watermark->temp_read_or_ppt_watermark_us); #endif s->TotalActiveWriteback = 0; for (unsigned int k = 0; k < p->NumberOfActiveSurfaces; ++k) { - if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { s->TotalActiveWriteback = s->TotalActiveWriteback + 1; } } @@ -6522,7 +6735,7 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( s->LBLatencyHidingSourceLinesC[k] = (unsigned int)(math_min2((double)p->MaxLineBufferLines, math_floor2((double)p->LineBufferSize / LBBitPerPixel / ((double)p->SwathWidthC[k] / math_max2(h_ratio_c, 1.0)), 1)) - (v_taps_c - 1)); #ifdef __DML_VBA_DEBUG__ - dml2_printf("DML::%s: k=%u, MaxLineBufferLines= %u\n", __func__, k, p->MaxLineBufferLines); + dml2_printf("DML::%s: k=%u, MaxLineBufferLines = %u\n", __func__, k, p->MaxLineBufferLines); dml2_printf("DML::%s: k=%u, LineBufferSize = %u\n", __func__, k, p->LineBufferSize); dml2_printf("DML::%s: k=%u, LBBitPerPixel = %u\n", __func__, k, LBBitPerPixel); dml2_printf("DML::%s: k=%u, HRatio = %f\n", __func__, k, h_ratio); @@ -6563,7 +6776,7 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( s->ActiveDRAMClockChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->DRAMClockChangeWatermark; s->ActiveFCLKChangeLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->FCLKChangeWatermark; s->USRRetrainingLatencyMargin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->USRRetrainingWatermark; - s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->g6_temp_read_watermark_us; + s->g6_temp_read_latency_margin[k] = s->ActiveClockChangeLatencyHiding - p->Watermark->temp_read_or_ppt_watermark_us; if (p->VActiveLatencyHidingMargin) p->VActiveLatencyHidingMargin[k] = s->ActiveDRAMClockChangeLatencyMargin[k]; @@ -6571,9 +6784,12 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( if (p->VActiveLatencyHidingUs) p->VActiveLatencyHidingUs[k] = s->ActiveClockChangeLatencyHiding; - if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.enable) { - s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0 / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height * (double)h_total / pixel_clock_mhz) * 4.0); - if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_64) { + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + s->WritebackLatencyHiding = (double)p->WritebackInterfaceBufferSize * 1024.0 + / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height + * (double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width + / ((double)p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * (double)h_total / pixel_clock_mhz) * 4.0); + if (p->display_cfg->stream_descriptors[p->display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) { s->WritebackLatencyHiding = s->WritebackLatencyHiding / 2; } s->WritebackDRAMClockChangeLatencyMargin = s->WritebackLatencyHiding - p->Watermark->WritebackDRAMClockChangeWatermark; @@ -6588,36 +6804,36 @@ static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport( uclk_pstate_change_strategy = p->display_cfg->plane_descriptors[k].overrides.uclk_pstate_change_strategy; reserved_vblank_time_us = (double)p->display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns / 1000; - p->FCLKChangeSupport[k] = dml2_fclock_change_unsupported; + p->FCLKChangeSupport[k] = dml2_pstate_change_unsupported; if (s->ActiveFCLKChangeLatencyMargin[k] > 0) - p->FCLKChangeSupport[k] = dml2_fclock_change_vactive; + p->FCLKChangeSupport[k] = dml2_pstate_change_vactive; else if (reserved_vblank_time_us >= p->mmSOCParameters.FCLKChangeLatency) - p->FCLKChangeSupport[k] = dml2_fclock_change_vblank; + p->FCLKChangeSupport[k] = dml2_pstate_change_vblank; - if (p->FCLKChangeSupport[k] == dml2_fclock_change_unsupported) + if (p->FCLKChangeSupport[k] == dml2_pstate_change_unsupported) *p->global_fclk_change_supported = false; - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_unsupported; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_unsupported; if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_auto) { if (p->display_cfg->overrides.all_streams_blanked || (s->ActiveDRAMClockChangeLatencyMargin[k] > 0 && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency)) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank_and_vactive; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank_and_vactive; else if (s->ActiveDRAMClockChangeLatencyMargin[k] > 0) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive; else if (reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank; } else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vactive && s->ActiveDRAMClockChangeLatencyMargin[k] > 0) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vactive; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vactive; else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_vblank && reserved_vblank_time_us >= p->mmSOCParameters.DRAMClockChangeLatency) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_vblank; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_vblank; else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_drr) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_drr; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_drr; else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_svp) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_mall_svp; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_svp; else if (uclk_pstate_change_strategy == dml2_uclk_pstate_change_strategy_force_mall_full_frame) - p->DRAMClockChangeSupport[k] = dml2_dram_clock_change_mall_full_frame; + p->DRAMClockChangeSupport[k] = dml2_pstate_change_mall_full_frame; - if (p->DRAMClockChangeSupport[k] == dml2_dram_clock_change_unsupported) + if (p->DRAMClockChangeSupport[k] == dml2_pstate_change_unsupported) *p->global_dram_clock_change_supported = false; s->dst_y_pstate = (unsigned int)(math_ceil2((p->mmSOCParameters.DRAMClockChangeLatency + p->mmSOCParameters.UrgentLatency) / (h_total / pixel_clock_mhz), 1)); @@ -6915,8 +7131,7 @@ struct dml2_core_internal_g6_temp_read_blackouts_table { } entries[DML_MAX_CLK_TABLE_SIZE]; }; -static const struct dml2_core_internal_g6_temp_read_blackouts_table - core_dcn4_g6_temp_read_blackout_table = { +struct dml2_core_internal_g6_temp_read_blackouts_table core_dcn4_g6_temp_read_blackout_table = { .entries = { { .uclk_khz = 96000, @@ -7036,6 +7251,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out struct dml2_core_calcs_CalculateVMRowAndSwath_params *CalculateVMRowAndSwath_params = &mode_lib->scratch.CalculateVMRowAndSwath_params; struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; +#ifdef DML_GLOBAL_PREFETCH_CHECK + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; +#endif struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; struct dml2_core_calcs_calculate_peak_bandwidth_required_params *calculate_peak_bandwidth_params = &mode_lib->scratch.calculate_peak_bandwidth_params; @@ -7083,12 +7301,6 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out for (k = 0; k < mode_lib->ms.num_active_planes; k++) dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); - - // dml2_printf_dml_policy(&mode_lib->ms.policy); - // dml2_printf_dml_display_cfg_timing(&display_cfg->timing, mode_lib->ms.num_active_planes); - // dml2_printf_dml_display_cfg_plane(&display_cfg->plane, mode_lib->ms.num_active_planes); - // dml2_printf_dml_display_cfg_surface(&display_cfg->surface, mode_lib->ms.num_active_planes); - // dml2_printf_dml_display_cfg_output(&display_cfg->output, mode_lib->ms.num_active_planes); #endif CalculateMaxDETAndMinCompressedBufferSize( @@ -7183,8 +7395,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - mode_lib->ms.SurfaceReadBandwidthLuma[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - mode_lib->ms.SurfaceReadBandwidthChroma[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + mode_lib->ms.vactive_sw_bw_l[k] = mode_lib->ms.SwathWidthYSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelY[k], 1.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + mode_lib->ms.vactive_sw_bw_c[k] = mode_lib->ms.SwathWidthCSingleDPP[k] * math_ceil2(mode_lib->ms.BytePerPixelC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; mode_lib->ms.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); @@ -7194,35 +7406,35 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out old_ReadBandwidthChroma = mode_lib->ms.SwathWidthYSingleDPP[k] / 2 * math_ceil2(mode_lib->ms.BytePerPixelInDETC[k], 2.0) / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio / 2.0; dml2_printf("DML::%s: k=%u, old_ReadBandwidthLuma = %f\n", __func__, k, old_ReadBandwidthLuma); dml2_printf("DML::%s: k=%u, old_ReadBandwidthChroma = %f\n", __func__, k, old_ReadBandwidthChroma); - dml2_printf("DML::%s: k=%u, ReadBandwidthLuma = %f\n", __func__, k, mode_lib->ms.SurfaceReadBandwidthLuma[k]); - dml2_printf("DML::%s: k=%u, ReadBandwidthChroma = %f\n", __func__, k, mode_lib->ms.SurfaceReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_l[k]); + dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->ms.vactive_sw_bw_c[k]); #endif } // Writeback bandwidth for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_64) { - mode_lib->ms.WriteBandwidth[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height - * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width - / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format == dml2_444_64) { + mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width + / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8.0; - } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { - mode_lib->ms.WriteBandwidth[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height - * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width - / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height + } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + mode_lib->ms.WriteBandwidth[k][0] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height + * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width + / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4.0; } else { - mode_lib->ms.WriteBandwidth[k] = 0.0; + mode_lib->ms.WriteBandwidth[k][0] = 0.0; } } /*Writeback Latency support check*/ mode_lib->ms.support.WritebackLatencySupport = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && - (mode_lib->ms.WriteBandwidth[k] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0 && + (mode_lib->ms.WriteBandwidth[k][0] > mode_lib->ip.writeback_interface_buffer_size_kbytes * 1024 / ((double)mode_lib->soc.qos_parameters.writeback.base_latency_us))) { mode_lib->ms.support.WritebackLatencySupport = false; } } @@ -7231,19 +7443,19 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out /* Writeback Scale Ratio and Taps Support Check */ mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = true; for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio > mode_lib->ip.writeback_max_hscl_ratio - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio > mode_lib->ip.writeback_max_vscl_ratio - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio < mode_lib->ip.writeback_min_hscl_ratio - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio < mode_lib->ip.writeback_min_vscl_ratio - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps - || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps - || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps % 2) == 1))) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > mode_lib->ip.writeback_max_hscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > mode_lib->ip.writeback_max_vscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio < mode_lib->ip.writeback_min_hscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio < mode_lib->ip.writeback_min_vscl_ratio + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > (unsigned int) mode_lib->ip.writeback_max_hscl_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps > (unsigned int) mode_lib->ip.writeback_max_vscl_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps + || display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio > (unsigned int)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps + || (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps > 2.0 && ((display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps % 2) == 1))) { mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; } - if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) { + if (2.0 * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height * (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps - 1) * 57 > mode_lib->ip.writeback_line_buffer_buffer_size) { mode_lib->ms.support.WritebackScaleRatioAndTapsSupport = false; } } @@ -7423,8 +7635,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculateSwathAndDETConfiguration_params->nomDETInKByte = mode_lib->ms.NomDETInKByte; CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.SurfaceReadBandwidthLuma; - CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.SurfaceReadBandwidthChroma; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->ms.vactive_sw_bw_l; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->ms.vactive_sw_bw_c; CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = mode_lib->ms.MaximumSwathWidthLuma; CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = mode_lib->ms.MaximumSwathWidthChroma; CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->ms.Read256BlockHeightY; @@ -7671,16 +7883,16 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out //DISPCLK/DPPCLK mode_lib->ms.WritebackRequiredDISPCLK = 0; for (k = 0; k < mode_lib->ms.num_active_planes; ++k) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { mode_lib->ms.WritebackRequiredDISPCLK = math_max2(mode_lib->ms.WritebackRequiredDISPCLK, - CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format, + CalculateWriteBackDISPCLK(display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000), - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_taps, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total, mode_lib->ip.writeback_line_buffer_buffer_size)); } @@ -7712,7 +7924,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out if (!s->stream_visited[display_cfg->plane_descriptors[k].stream_index]) { s->stream_visited[display_cfg->plane_descriptors[k].stream_index] = 1; - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) s->TotalNumberOfActiveWriteback = s->TotalNumberOfActiveWriteback + 1; s->TotalNumberOfActiveOTG = s->TotalNumberOfActiveOTG + 1; @@ -8256,23 +8468,23 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.PSCL_FACTOR, mode_lib->ms.PSCL_FACTOR_CHROMA, mode_lib->ms.RequiredDPPCLK, - mode_lib->ms.SurfaceReadBandwidthLuma, - mode_lib->ms.SurfaceReadBandwidthChroma, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, mode_lib->soc.return_bus_width_bytes, /* Output */ &mode_lib->ms.dcfclk_deepsleep); for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { mode_lib->ms.WritebackDelayTime[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay( - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->ms.RequiredDISPCLK; } else { mode_lib->ms.WritebackDelayTime[k] = 0.0; @@ -8349,7 +8561,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: mode_lib->ms.DCFCLK = %f\n", __func__, mode_lib->ms.DCFCLK); dml2_printf("DML::%s: mode_lib->ms.FabricClock = %f\n", __func__, mode_lib->ms.FabricClock); dml2_printf("DML::%s: mode_lib->ms.uclk_freq_mhz = %f\n", __func__, mode_lib->ms.uclk_freq_mhz); - dml2_printf("DML::%s: urgent latency tolerance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); + dml2_printf("DML::%s: urgent latency tolarance = %f\n", __func__, ((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 / (mode_lib->ms.DCFCLK * mode_lib->soc.return_bus_width_bytes))); #endif mode_lib->ms.support.OutstandingRequestsSupport = true; @@ -8367,6 +8579,13 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out + mode_lib->soc.qos_parameters.qos_params.dcn4x.average_transport_distance_fclk_cycles / mode_lib->ms.FabricClock) * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_average_transport_latency_margin / 100.0); + mode_lib->ms.support.max_non_urgent_latency_us + = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->ms.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles + / mode_lib->ms.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) + + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->ms.FabricClock + + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->ms.FabricClock + * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { @@ -8408,7 +8627,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } memset(calculate_mcache_setting_params, 0, sizeof(struct dml2_core_calcs_calculate_mcache_setting_params)); - if (mode_lib->soc.mall_allocated_for_dcn_mbytes == 0 || mode_lib->ip.dcn_mrq_present) { + if (mode_lib->soc.mcache_size_bytes == 0 || mode_lib->ip.dcn_mrq_present) { for (k = 0; k < mode_lib->ms.num_active_planes; k++) { mode_lib->ms.mall_prefetch_sdp_overhead_factor[k] = 1.0; mode_lib->ms.mall_prefetch_dram_overhead_factor[k] = 1.0; @@ -8515,8 +8734,11 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out display_cfg->hostvm_enable, mode_lib->ms.MaxDCFCLK, mode_lib->ms.MaxFabricClock, +#ifdef DML_MODE_SUPPORT_USE_DPM_DRAM_BW + mode_lib->ms.dram_bw_mbps); +#else mode_lib->ms.max_dram_bw_mbps); - +#endif // Average BW support check calculate_avg_bandwidth_required( @@ -8524,8 +8746,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out // input display_cfg, mode_lib->ms.num_active_planes, - mode_lib->ms.SurfaceReadBandwidthLuma, - mode_lib->ms.SurfaceReadBandwidthChroma, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, mode_lib->ms.cursor_bw, mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, @@ -8638,9 +8860,32 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out &mode_lib->ms.ExtraLatency_sr, &mode_lib->ms.ExtraLatencyPrefetch); - { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) + s->impacted_dst_y_pre[k] = 0; + + s->recalc_prefetch_schedule = 0; + s->recalc_prefetch_done = 0; + do { mode_lib->ms.support.PrefetchSupported = true; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; + + s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->ms.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane0.width, + display_cfg->plane_descriptors[k].composition.viewport.plane0.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->ms.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane1.width, + display_cfg->plane_descriptors[k].composition.viewport.plane1.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + struct dml2_core_internal_DmlPipe *myPipe = &s->myPipe; mode_lib->ms.TWait[k] = CalculateTWait( @@ -8730,6 +8975,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->ms.meta_row_bytes[k]; CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor[k]; + CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->ms.vactive_sw_bw_l[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->ms.vactive_sw_bw_c[k]; // output CalculatePrefetchSchedule_params->DSTXAfterScaler = &s->DSTXAfterScaler[k]; @@ -8758,6 +9006,9 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out CalculatePrefetchSchedule_params->VUpdateWidthPix = &s->dummy_integer[1]; CalculatePrefetchSchedule_params->VReadyOffsetPix = &s->dummy_integer[2]; CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->ms.prefetch_cursor_bw[k]; + CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; + CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; + CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; mode_lib->ms.NoTimeForPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); @@ -8789,7 +9040,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } mode_lib->ms.support.VRatioInPrefetchSupported = true; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { if (mode_lib->ms.VRatioPreY[k] > __DML2_CALCS_MAX_VRATIO_PRE__ || mode_lib->ms.VRatioPreC[k] > __DML2_CALCS_MAX_VRATIO_PRE__) { mode_lib->ms.support.VRatioInPrefetchSupported = false; @@ -8799,10 +9050,14 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } } + mode_lib->ms.support.PrefetchSupported &= mode_lib->ms.support.VRatioInPrefetchSupported; + + // By default, do not recalc prefetch schedule + s->recalc_prefetch_schedule = 0; + // Only do urg vs prefetch bandwidth check, flip schedule check, power saving feature support check IF the Prefetch Schedule Check is ok if (mode_lib->ms.support.PrefetchSupported) { - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - double line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { // Calculate Urgent burst factor for prefetch #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: k=%d, Calling CalculateUrgentBurstFactor (for prefetch)\n", __func__, k); @@ -8815,7 +9070,7 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out mode_lib->ms.swath_width_chroma_ub[k], mode_lib->ms.SwathHeightY[k], mode_lib->ms.SwathHeightC[k], - line_time_us, + s->line_times[k], mode_lib->ms.UrgLatency, mode_lib->ms.VRatioPreY[k], mode_lib->ms.VRatioPreC[k], @@ -8852,8 +9107,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.SurfaceReadBandwidthLuma; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.SurfaceReadBandwidthChroma; + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; @@ -8899,127 +9154,164 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out } } +#ifdef DML_GLOBAL_PREFETCH_CHECK + if (mode_lib->ms.support.PrefetchSupported && mode_lib->ms.num_active_planes > 1 && s->recalc_prefetch_done == 0) { + CheckGlobalPrefetchAdmissibility_params->num_active_planes = mode_lib->ms.num_active_planes; + CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; + CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->ms.SwathHeightY; + CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->ms.SwathHeightC; + CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->ms.CompressedBufferSizeInkByte; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->ms.DETBufferSizeY; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->ms.DETBufferSizeC; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; + CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; + CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = s->Tpre_rounded; + CheckGlobalPrefetchAdmissibility_params->Tpre_oto = s->Tpre_oto; + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->ms.support.urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; + CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->ms.dst_y_prefetch; + if (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps < 10 * 1024) + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = 10 * 1024; + + CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = (CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps / (double) mode_lib->soc.return_bus_width_bytes) / + ((double)mode_lib->soc.qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100.0); + + // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible + CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->recalc_prefetch_schedule; + CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; + mode_lib->ms.support.PrefetchSupported = CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); + s->recalc_prefetch_done = 1; + s->recalc_prefetch_schedule = 1; + } +#endif + } // prefetch schedule ok, do urg bw and flip schedule + } while (s->recalc_prefetch_schedule); - // Both prefetch schedule and BW okay - if (mode_lib->ms.support.PrefetchSupported == true && mode_lib->ms.support.VRatioInPrefetchSupported == true) { - mode_lib->ms.BandwidthAvailableForImmediateFlip = - get_bandwidth_available_for_immediate_flip( - dml2_core_internal_soc_state_sys_active, - mode_lib->ms.support.urg_bandwidth_required_qual, // no flip - mode_lib->ms.support.urg_bandwidth_available); - - mode_lib->ms.TotImmediateFlipBytes = 0; - for (k = 0; k < mode_lib->ms.num_active_planes; k++) { - if (display_cfg->plane_descriptors[k].immediate_flip) { - s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( - s->HostVMInefficiencyFactor, - mode_lib->ms.vm_bytes[k], - mode_lib->ms.DPTEBytesPerRow[k], - mode_lib->ms.meta_row_bytes[k]); - } else { - s->per_pipe_flip_bytes[k] = 0; - } - mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k]; + // Flip Schedule + // Both prefetch schedule and BW okay + if (mode_lib->ms.support.PrefetchSupported == true) { + mode_lib->ms.BandwidthAvailableForImmediateFlip = + get_bandwidth_available_for_immediate_flip( + dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required_qual, // no flip + mode_lib->ms.support.urg_bandwidth_available); - } + mode_lib->ms.TotImmediateFlipBytes = 0; + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip) { + s->per_pipe_flip_bytes[k] = get_pipe_flip_bytes( + s->HostVMInefficiencyFactor, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.meta_row_bytes[k]); + } else { + s->per_pipe_flip_bytes[k] = 0; + } + mode_lib->ms.TotImmediateFlipBytes += s->per_pipe_flip_bytes[k] * mode_lib->ms.NoOfDPP[k]; - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - CalculateFlipSchedule( - &mode_lib->scratch, - display_cfg->plane_descriptors[k].immediate_flip, - 1, // use_lb_flip_bw - s->HostVMInefficiencyFactor, - s->Tvm_trips_flip[k], - s->Tr0_trips_flip[k], - s->Tvm_trips_flip_rounded[k], - s->Tr0_trips_flip_rounded[k], - display_cfg->gpuvm_enable, - mode_lib->ms.vm_bytes[k], - mode_lib->ms.DPTEBytesPerRow[k], - mode_lib->ms.BandwidthAvailableForImmediateFlip, - mode_lib->ms.TotImmediateFlipBytes, - display_cfg->plane_descriptors[k].pixel_format, - (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), - display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, - display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, - mode_lib->ms.Tno_bw_flip[k], - mode_lib->ms.dpte_row_height[k], - mode_lib->ms.dpte_row_height_chroma[k], - mode_lib->ms.use_one_row_for_frame_flip[k], - mode_lib->ip.max_flip_time_us, - mode_lib->ip.max_flip_time_lines, - s->per_pipe_flip_bytes[k], - mode_lib->ms.meta_row_bytes[k], - s->meta_row_height_luma[k], - s->meta_row_height_chroma[k], - mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, - - /* Output */ - &mode_lib->ms.dst_y_per_vm_flip[k], - &mode_lib->ms.dst_y_per_row_flip[k], - &mode_lib->ms.final_flip_bw[k], - &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); - } + } - calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; - calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip; - calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; - calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip; - calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; - calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; - - calculate_peak_bandwidth_params->display_cfg = display_cfg; - calculate_peak_bandwidth_params->inc_flip_bw = 1; - calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; - calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; - calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; - calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; - calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; - - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.SurfaceReadBandwidthLuma; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.SurfaceReadBandwidthChroma; - calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; - calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; - calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; - calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; - calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; - calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; - calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; - calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; - calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; - calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; - calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; - calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; - calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; - - calculate_peak_bandwidth_required( - &mode_lib->scratch, - calculate_peak_bandwidth_params); - - calculate_immediate_flip_bandwidth_support( - &s->dummy_single[0], // double* frac_urg_bandwidth_flip - &mode_lib->ms.support.ImmediateFlipSupport, - - dml2_core_internal_soc_state_sys_active, - mode_lib->ms.support.urg_bandwidth_required_flip, - mode_lib->ms.support.non_urg_bandwidth_required_flip, - mode_lib->ms.support.urg_bandwidth_available); - - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { - if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false) - mode_lib->ms.support.ImmediateFlipSupport = false; - } + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { + CalculateFlipSchedule( + &mode_lib->scratch, + display_cfg->plane_descriptors[k].immediate_flip, + 1, // use_lb_flip_bw + s->HostVMInefficiencyFactor, + s->Tvm_trips_flip[k], + s->Tr0_trips_flip[k], + s->Tvm_trips_flip_rounded[k], + s->Tr0_trips_flip_rounded[k], + display_cfg->gpuvm_enable, + mode_lib->ms.vm_bytes[k], + mode_lib->ms.DPTEBytesPerRow[k], + mode_lib->ms.BandwidthAvailableForImmediateFlip, + mode_lib->ms.TotImmediateFlipBytes, + display_cfg->plane_descriptors[k].pixel_format, + (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)), + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, + mode_lib->ms.Tno_bw_flip[k], + mode_lib->ms.dpte_row_height[k], + mode_lib->ms.dpte_row_height_chroma[k], + mode_lib->ms.use_one_row_for_frame_flip[k], + mode_lib->ip.max_flip_time_us, + mode_lib->ip.max_flip_time_lines, + s->per_pipe_flip_bytes[k], + mode_lib->ms.meta_row_bytes[k], + s->meta_row_height_luma[k], + s->meta_row_height_chroma[k], + mode_lib->ip.dcn_mrq_present && display_cfg->plane_descriptors[k].surface.dcc.enable, + + /* Output */ + &mode_lib->ms.dst_y_per_vm_flip[k], + &mode_lib->ms.dst_y_per_row_flip[k], + &mode_lib->ms.final_flip_bw[k], + &mode_lib->ms.ImmediateFlipSupportedForPipe[k]); + } + + calculate_peak_bandwidth_params->urg_vactive_bandwidth_required = s->dummy_bw; + calculate_peak_bandwidth_params->urg_bandwidth_required = mode_lib->ms.support.urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->urg_bandwidth_required_qual = s->dummy_bw; + calculate_peak_bandwidth_params->non_urg_bandwidth_required = mode_lib->ms.support.non_urg_bandwidth_required_flip; + calculate_peak_bandwidth_params->surface_avg_vactive_required_bw = s->surface_dummy_bw; + calculate_peak_bandwidth_params->surface_peak_required_bw = mode_lib->ms.surface_peak_required_bw; + + calculate_peak_bandwidth_params->display_cfg = display_cfg; + calculate_peak_bandwidth_params->inc_flip_bw = 1; + calculate_peak_bandwidth_params->num_active_planes = mode_lib->ms.num_active_planes; + calculate_peak_bandwidth_params->num_of_dpp = mode_lib->ms.NoOfDPP; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_nom_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p0 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p0; + calculate_peak_bandwidth_params->dcc_dram_bw_pref_overhead_factor_p1 = mode_lib->ms.dcc_dram_bw_pref_overhead_factor_p1; + calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->ms.mall_prefetch_sdp_overhead_factor; + calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->ms.mall_prefetch_dram_overhead_factor; + + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->ms.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->ms.vactive_sw_bw_c; + calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->ms.RequiredPrefetchPixelDataBWLuma; + calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->ms.RequiredPrefetchPixelDataBWChroma; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->ms.excess_vactive_fill_bw_l; + calculate_peak_bandwidth_params->excess_vactive_fill_bw_c = mode_lib->ms.excess_vactive_fill_bw_c; + calculate_peak_bandwidth_params->cursor_bw = mode_lib->ms.cursor_bw; + calculate_peak_bandwidth_params->dpte_row_bw = mode_lib->ms.dpte_row_bw; + calculate_peak_bandwidth_params->meta_row_bw = mode_lib->ms.meta_row_bw; + calculate_peak_bandwidth_params->prefetch_cursor_bw = mode_lib->ms.prefetch_cursor_bw; + calculate_peak_bandwidth_params->prefetch_vmrow_bw = mode_lib->ms.prefetch_vmrow_bw; + calculate_peak_bandwidth_params->flip_bw = mode_lib->ms.final_flip_bw; + calculate_peak_bandwidth_params->urgent_burst_factor_l = mode_lib->ms.UrgentBurstFactorLuma; + calculate_peak_bandwidth_params->urgent_burst_factor_c = mode_lib->ms.UrgentBurstFactorChroma; + calculate_peak_bandwidth_params->urgent_burst_factor_cursor = mode_lib->ms.UrgentBurstFactorCursor; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_l = mode_lib->ms.UrgentBurstFactorLumaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_c = mode_lib->ms.UrgentBurstFactorChromaPre; + calculate_peak_bandwidth_params->urgent_burst_factor_prefetch_cursor = mode_lib->ms.UrgentBurstFactorCursorPre; + + calculate_peak_bandwidth_required( + &mode_lib->scratch, + calculate_peak_bandwidth_params); + + calculate_immediate_flip_bandwidth_support( + &s->dummy_single[0], // double* frac_urg_bandwidth_flip + &mode_lib->ms.support.ImmediateFlipSupport, + + dml2_core_internal_soc_state_sys_active, + mode_lib->ms.support.urg_bandwidth_required_flip, + mode_lib->ms.support.non_urg_bandwidth_required_flip, + mode_lib->ms.support.urg_bandwidth_available); - } else { // if prefetch not support, assume iflip is not supported too + for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + if (display_cfg->plane_descriptors[k].immediate_flip == true && mode_lib->ms.ImmediateFlipSupportedForPipe[k] == false) mode_lib->ms.support.ImmediateFlipSupport = false; - } - } // prefetch schedule + } + + } else { // if prefetch not support, assume iflip is not supported too + mode_lib->ms.support.ImmediateFlipSupport = false; } s->mSOCParameters.UrgentLatency = mode_lib->ms.UrgLatency; @@ -9116,8 +9408,8 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out s->pstate_bytes_required_c, mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p0, mode_lib->ms.dcc_dram_bw_nom_overhead_factor_p1, - mode_lib->ms.SurfaceReadBandwidthLuma, - mode_lib->ms.SurfaceReadBandwidthChroma, + mode_lib->ms.vactive_sw_bw_l, + mode_lib->ms.vactive_sw_bw_c, mode_lib->ms.surface_avg_vactive_required_bw, mode_lib->ms.surface_peak_required_bw, /* outputs */ @@ -9187,12 +9479,12 @@ static bool dml_core_mode_support(struct dml2_core_calcs_mode_support_ex *in_out dml2_printf("DML::%s: ModeSupport = %u\n", __func__, mode_lib->ms.support.ModeSupport); dml2_printf("DML::%s: ImmediateFlipSupport = %u\n", __func__, mode_lib->ms.support.ImmediateFlipSupport); - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { mode_lib->ms.support.MPCCombineEnable[k] = mode_lib->ms.MPCCombine[k]; mode_lib->ms.support.DPPPerSurface[k] = mode_lib->ms.NoOfDPP[k]; } - for (k = 0; k <= mode_lib->ms.num_active_planes - 1; k++) { + for (k = 0; k < mode_lib->ms.num_active_planes; k++) { mode_lib->ms.support.ODMMode[k] = mode_lib->ms.ODMMode[k]; mode_lib->ms.support.DSCEnabled[k] = mode_lib->ms.RequiresDSC[k]; mode_lib->ms.support.FECEnabled[k] = mode_lib->ms.RequiresFEC[k]; @@ -9229,7 +9521,7 @@ unsigned int dml2_core_calcs_mode_support_ex(struct dml2_core_calcs_mode_support dml2_printf("DML::%s: is_mode_support = %u (min_clk_index=%d)\n", __func__, result, in_out_params->min_clk_index); for (unsigned int k = 0; k < in_out_params->in_display_cfg->num_planes; k++) - dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); + dml2_printf("DML::%s: plane_%d: reserved_vblank_time_ns = %u\n", __func__, k, in_out_params->in_display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns); dml2_printf("DML::%s: ------------- DONE ----------\n", __func__); @@ -9882,7 +10174,7 @@ static void CalculateStutterEfficiency(struct dml2_core_internal_scratch *scratc if (!dml_is_phantom_pipe(&p->display_cfg->plane_descriptors[k])) { if (!l->stream_visited[p->display_cfg->plane_descriptors[k].stream_index]) { - if (p->display_cfg->stream_descriptors[k].writeback.enable) + if (p->display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) l->TotalActiveWriteback = l->TotalActiveWriteback + 1; if (TotalNumberOfActiveOTG == 0) { // first otg @@ -9984,6 +10276,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params *CalculateSwathAndDETConfiguration_params = &mode_lib->scratch.CalculateSwathAndDETConfiguration_params; struct dml2_core_calcs_CalculateStutterEfficiency_params *CalculateStutterEfficiency_params = &mode_lib->scratch.CalculateStutterEfficiency_params; struct dml2_core_calcs_CalculatePrefetchSchedule_params *CalculatePrefetchSchedule_params = &mode_lib->scratch.CalculatePrefetchSchedule_params; + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params *CheckGlobalPrefetchAdmissibility_params = &mode_lib->scratch.CheckGlobalPrefetchAdmissibility_params; struct dml2_core_calcs_calculate_mcache_setting_params *calculate_mcache_setting_params = &mode_lib->scratch.calculate_mcache_setting_params; struct dml2_core_calcs_calculate_tdlut_setting_params *calculate_tdlut_setting_params = &mode_lib->scratch.calculate_tdlut_setting_params; struct dml2_core_shared_CalculateMetaAndPTETimes_params *CalculateMetaAndPTETimes_params = &mode_lib->scratch.CalculateMetaAndPTETimes_params; @@ -10198,10 +10491,10 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { mode_lib->mp.cursor_bw[k] = display_cfg->plane_descriptors[k].cursor.num_cursors * display_cfg->plane_descriptors[k].cursor.cursor_width * display_cfg->plane_descriptors[k].cursor.cursor_bpp / 8.0 / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)); - mode_lib->mp.SurfaceReadBandwidthLuma[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; - mode_lib->mp.SurfaceReadBandwidthChroma[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; - dml2_printf("DML::%s: ReadBandwidthSurfaceLuma[%i] = %fBps\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]); - dml2_printf("DML::%s: ReadBandwidthSurfaceChroma[%i] = %fBps\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]); + mode_lib->mp.vactive_sw_bw_l[k] = mode_lib->mp.SwathWidthSingleDPPY[k] * mode_lib->mp.BytePerPixelY[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio; + mode_lib->mp.vactive_sw_bw_c[k] = mode_lib->mp.SwathWidthSingleDPPC[k] * mode_lib->mp.BytePerPixelC[k] / (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio; + dml2_printf("DML::%s: vactive_sw_bw_l[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + dml2_printf("DML::%s: vactive_sw_bw_c[%i] = %fBps\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); } CalculateSwathAndDETConfiguration_params->display_cfg = display_cfg; @@ -10217,8 +10510,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculateSwathAndDETConfiguration_params->nomDETInKByte = s->NomDETInKByte; CalculateSwathAndDETConfiguration_params->ConfigReturnBufferSegmentSizeInkByte = mode_lib->ip.config_return_buffer_segment_size_in_kbytes; CalculateSwathAndDETConfiguration_params->CompressedBufferSegmentSizeInkByte = mode_lib->ip.compressed_buffer_segment_size_in_kbytes; - CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.SurfaceReadBandwidthLuma; - CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.SurfaceReadBandwidthChroma; + CalculateSwathAndDETConfiguration_params->ReadBandwidthLuma = mode_lib->mp.vactive_sw_bw_l; + CalculateSwathAndDETConfiguration_params->ReadBandwidthChroma = mode_lib->mp.vactive_sw_bw_c; CalculateSwathAndDETConfiguration_params->MaximumSwathWidthLuma = s->dummy_single_array[0]; CalculateSwathAndDETConfiguration_params->MaximumSwathWidthChroma = s->dummy_single_array[1]; CalculateSwathAndDETConfiguration_params->Read256BytesBlockHeightY = mode_lib->mp.Read256BlockHeightY; @@ -10583,17 +10876,17 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.TCalc = 24.0 / mode_lib->mp.DCFCLKDeepSleep; for (k = 0; k < s->num_active_planes; ++k) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { mode_lib->mp.WritebackDelay[k] = mode_lib->soc.qos_parameters.writeback.base_latency_us + CalculateWriteBackDelay( - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.h_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_ratio, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.v_taps, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height, - display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].pixel_format, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].h_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_ratio, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].v_taps, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_width, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].output_height, + display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.writeback_stream[0].input_height, display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total) / mode_lib->mp.Dispclk; } else mode_lib->mp.WritebackDelay[k] = 0; @@ -10679,10 +10972,25 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex for (k = 0; k < s->num_active_planes; ++k) { bool cursor_not_enough_urgent_latency_hiding = 0; - double line_time_us = 0.0; - - line_time_us = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / + s->line_times[k] = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000); + + s->pixel_format[k] = display_cfg->plane_descriptors[k].pixel_format; + + s->lb_source_lines_l[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->mp.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane0.width, + display_cfg->plane_descriptors[k].composition.viewport.plane0.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane0.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + + s->lb_source_lines_c[k] = get_num_lb_source_lines(mode_lib->ip.max_line_buffer_lines, mode_lib->ip.line_buffer_size_bits, + mode_lib->mp.NoOfDPP[k], + display_cfg->plane_descriptors[k].composition.viewport.plane1.width, + display_cfg->plane_descriptors[k].composition.viewport.plane1.height, + display_cfg->plane_descriptors[k].composition.scaler_info.plane1.h_ratio, + display_cfg->plane_descriptors[k].composition.rotation_angle); + if (display_cfg->plane_descriptors[k].cursor.num_cursors > 0) { calculate_cursor_req_attributes( display_cfg->plane_descriptors[k].cursor.cursor_width, @@ -10699,7 +11007,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex display_cfg->plane_descriptors[k].cursor.cursor_width, s->cursor_bytes_per_chunk[k], s->cursor_lines_per_chunk[k], - line_time_us, + s->line_times[k], mode_lib->mp.UrgentLatency, // output @@ -10714,7 +11022,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex mode_lib->mp.swath_width_chroma_ub[k], mode_lib->mp.SwathHeightY[k], mode_lib->mp.SwathHeightC[k], - line_time_us, + s->line_times[k], mode_lib->mp.UrgentLatency, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio, display_cfg->plane_descriptors[k].composition.scaler_info.plane1.v_ratio, @@ -10752,6 +11060,35 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex dml2_printf("DML::%s: immediate_flip_required = %u\n", __func__, s->immediate_flip_required); #endif + if (s->num_active_planes > 1) { + CheckGlobalPrefetchAdmissibility_params->num_active_planes = s->num_active_planes; + CheckGlobalPrefetchAdmissibility_params->pixel_format = s->pixel_format; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_l = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->chunk_bytes_c = mode_lib->ip.pixel_chunk_size_kbytes * 1024; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_l = s->lb_source_lines_l; + CheckGlobalPrefetchAdmissibility_params->lb_source_lines_c = s->lb_source_lines_c; + CheckGlobalPrefetchAdmissibility_params->swath_height_l = mode_lib->mp.SwathHeightY; + CheckGlobalPrefetchAdmissibility_params->swath_height_c = mode_lib->mp.SwathHeightC; + CheckGlobalPrefetchAdmissibility_params->rob_buffer_size_kbytes = mode_lib->ip.rob_buffer_size_kbytes; + CheckGlobalPrefetchAdmissibility_params->compressed_buffer_size_kbytes = mode_lib->mp.CompressedBufferSizeInkByte; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_l = mode_lib->mp.DETBufferSizeY; + CheckGlobalPrefetchAdmissibility_params->detile_buffer_size_bytes_c = mode_lib->mp.DETBufferSizeC; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_l = s->full_swath_bytes_l; + CheckGlobalPrefetchAdmissibility_params->full_swath_bytes_c = s->full_swath_bytes_c; + CheckGlobalPrefetchAdmissibility_params->prefetch_sw_bytes = s->prefetch_sw_bytes; + CheckGlobalPrefetchAdmissibility_params->Tpre_rounded = 0; // don't care + CheckGlobalPrefetchAdmissibility_params->Tpre_oto = 0; // don't care + CheckGlobalPrefetchAdmissibility_params->estimated_urg_bandwidth_required_mbps = mode_lib->mp.urg_bandwidth_available[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]; + CheckGlobalPrefetchAdmissibility_params->estimated_dcfclk_mhz = mode_lib->mp.Dcfclk; + CheckGlobalPrefetchAdmissibility_params->line_time = s->line_times; + CheckGlobalPrefetchAdmissibility_params->dst_y_prefetch = mode_lib->mp.dst_y_prefetch; + + // if recalc_prefetch_schedule is set, recalculate the prefetch schedule with the new impacted_Tpre, prefetch should be possible + CheckGlobalPrefetchAdmissibility_params->recalc_prefetch_schedule = &s->dummy_boolean[0]; + CheckGlobalPrefetchAdmissibility_params->impacted_dst_y_pre = s->impacted_dst_y_pre; + CheckGlobalPrefetchAdmissibility(&mode_lib->scratch, CheckGlobalPrefetchAdmissibility_params); // dont care about the check output for mode programming + } + { s->DestinationLineTimesForPrefetchLessThan2 = false; s->VRatioPrefetchMoreThanMax = false; @@ -10763,11 +11100,11 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex dml2_printf("DML::%s: k=%d MaxVStartupLines = %u\n", __func__, k, s->MaxVStartupLines[k]); mode_lib->mp.TWait[k] = CalculateTWait( - display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, - mode_lib->mp.UrgentLatency, - mode_lib->mp.TripToMemory, - !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? - get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); + display_cfg->plane_descriptors[k].overrides.reserved_vblank_time_ns, + mode_lib->mp.UrgentLatency, + mode_lib->mp.TripToMemory, + !dml_is_phantom_pipe(&display_cfg->plane_descriptors[k]) && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.drr_config.enabled ? + get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index) : 0.0); myPipe->Dppclk = mode_lib->mp.Dppclk[k]; myPipe->Dispclk = mode_lib->mp.Dispclk; @@ -10848,6 +11185,9 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->mrq_present = mode_lib->ip.dcn_mrq_present; CalculatePrefetchSchedule_params->meta_row_bytes = mode_lib->mp.meta_row_bytes[k]; CalculatePrefetchSchedule_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor[k]; + CalculatePrefetchSchedule_params->impacted_dst_y_pre = s->impacted_dst_y_pre[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_l = mode_lib->mp.vactive_sw_bw_l[k]; + CalculatePrefetchSchedule_params->vactive_sw_bw_c = mode_lib->mp.vactive_sw_bw_c[k]; // output CalculatePrefetchSchedule_params->DSTXAfterScaler = &mode_lib->mp.DSTXAfterScaler[k]; @@ -10876,9 +11216,17 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculatePrefetchSchedule_params->VUpdateWidthPix = &mode_lib->mp.VUpdateWidthPix[k]; CalculatePrefetchSchedule_params->VReadyOffsetPix = &mode_lib->mp.VReadyOffsetPix[k]; CalculatePrefetchSchedule_params->prefetch_cursor_bw = &mode_lib->mp.prefetch_cursor_bw[k]; + CalculatePrefetchSchedule_params->prefetch_sw_bytes = &s->prefetch_sw_bytes[k]; + CalculatePrefetchSchedule_params->Tpre_rounded = &s->Tpre_rounded[k]; + CalculatePrefetchSchedule_params->Tpre_oto = &s->Tpre_oto[k]; mode_lib->mp.NoTimeToPrefetch[k] = CalculatePrefetchSchedule(&mode_lib->scratch, CalculatePrefetchSchedule_params); + if (s->impacted_dst_y_pre[k] > 0) + mode_lib->mp.impacted_prefetch_margin_us[k] = (mode_lib->mp.dst_y_prefetch[k] - s->impacted_dst_y_pre[k]) * s->line_times[k]; + else + mode_lib->mp.impacted_prefetch_margin_us[k] = 0; + #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: k=%0u NoTimeToPrefetch=%0d\n", __func__, k, mode_lib->mp.NoTimeToPrefetch[k]); #endif @@ -10956,8 +11304,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex dml2_printf("DML::%s: k=%0u VRatioY=%f\n", __func__, k, display_cfg->plane_descriptors[k].composition.scaler_info.plane0.v_ratio); dml2_printf("DML::%s: k=%0u prefetch_vmrow_bw=%f\n", __func__, k, mode_lib->mp.prefetch_vmrow_bw[k]); - dml2_printf("DML::%s: k=%0u ReadBandwidthSurfaceLuma=%f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]); - dml2_printf("DML::%s: k=%0u ReadBandwidthSurfaceChroma=%f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%0u vactive_sw_bw_l=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + dml2_printf("DML::%s: k=%0u vactive_sw_bw_c=%f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); dml2_printf("DML::%s: k=%0u cursor_bw=%f\n", __func__, k, mode_lib->mp.cursor_bw[k]); dml2_printf("DML::%s: k=%0u dpte_row_bw=%f\n", __func__, k, mode_lib->mp.dpte_row_bw[k]); dml2_printf("DML::%s: k=%0u meta_row_bw=%f\n", __func__, k, mode_lib->mp.meta_row_bw[k]); @@ -10988,8 +11336,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor; calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor; - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.SurfaceReadBandwidthLuma; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.SurfaceReadBandwidthChroma; + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; @@ -11120,8 +11468,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex calculate_peak_bandwidth_params->mall_prefetch_sdp_overhead_factor = mode_lib->mp.mall_prefetch_sdp_overhead_factor; calculate_peak_bandwidth_params->mall_prefetch_dram_overhead_factor = mode_lib->mp.mall_prefetch_dram_overhead_factor; - calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.SurfaceReadBandwidthLuma; - calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.SurfaceReadBandwidthChroma; + calculate_peak_bandwidth_params->surface_read_bandwidth_l = mode_lib->mp.vactive_sw_bw_l; + calculate_peak_bandwidth_params->surface_read_bandwidth_c = mode_lib->mp.vactive_sw_bw_c; calculate_peak_bandwidth_params->prefetch_bandwidth_l = mode_lib->mp.RequiredPrefetchPixelDataBWLuma; calculate_peak_bandwidth_params->prefetch_bandwidth_c = mode_lib->mp.RequiredPrefetchPixelDataBWChroma; calculate_peak_bandwidth_params->excess_vactive_fill_bw_l = mode_lib->mp.excess_vactive_fill_bw_l; @@ -11238,8 +11586,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex s->mmSOCParameters.USRRetrainingLatency = 0; s->mmSOCParameters.SMNLatency = 0; s->mmSOCParameters.g6_temp_read_blackout_us = get_g6_temp_read_blackout_us(&mode_lib->soc, (unsigned int)(mode_lib->mp.uclk_freq_mhz * 1000), in_out_params->min_clk_index); - s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->ms.uclk_freq_mhz, mode_lib->ms.FabricClock, in_out_params->min_clk_index); - s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->ms.FabricClock; + s->mmSOCParameters.max_urgent_latency_us = get_max_urgent_latency_us(&mode_lib->soc.qos_parameters.qos_params.dcn4x, mode_lib->mp.uclk_freq_mhz, mode_lib->mp.FabricClock, in_out_params->min_clk_index); + s->mmSOCParameters.df_response_time_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.df_qos_response_time_fclk_cycles / mode_lib->mp.FabricClock; s->mmSOCParameters.qos_type = mode_lib->soc.qos_parameters.qos_type; CalculateWatermarks_params->display_cfg = display_cfg; @@ -11289,7 +11637,7 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(&mode_lib->scratch, CalculateWatermarks_params); for (k = 0; k < s->num_active_planes; ++k) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { + if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.active_writebacks_per_stream > 0) { mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000) - mode_lib->mp.Watermark.WritebackDRAMClockChangeWatermark); mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k] = math_max2(0, mode_lib->mp.VStartupMin[k] * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total / @@ -11475,25 +11823,25 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex //Maximum Bandwidth Used s->TotalWRBandwidth = 0; - s->WRBandwidth = 0; - for (k = 0; k < s->num_active_planes; ++k) { - if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true && display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.pixel_format == dml2_444_32) { - s->WRBandwidth = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / - (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 4; - } else if (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.enable == true) { - s->WRBandwidth = display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_height * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.output_width / - (display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.h_total * display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].writeback.scaling_info.input_height / ((double)display_cfg->stream_descriptors[display_cfg->plane_descriptors[k].stream_index].timing.pixel_clock_khz / 1000)) * 8; + for (k = 0; k < display_cfg->num_streams; ++k) { + s->WRBandwidth = 0; + if (display_cfg->stream_descriptors[k].writeback.active_writebacks_per_stream > 0) { + s->WRBandwidth = display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_height + * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].output_width / + (display_cfg->stream_descriptors[k].timing.h_total * display_cfg->stream_descriptors[k].writeback.writeback_stream[0].input_height + / ((double)display_cfg->stream_descriptors[k].timing.pixel_clock_khz / 1000)) + * (display_cfg->stream_descriptors[k].writeback.writeback_stream[0].pixel_format == dml2_444_32 ? 4.0 : 8.0); + s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth; } - s->TotalWRBandwidth = s->TotalWRBandwidth + s->WRBandwidth; } mode_lib->mp.TotalDataReadBandwidth = 0; for (k = 0; k < s->num_active_planes; ++k) { - mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.SurfaceReadBandwidthLuma[k] + mode_lib->mp.SurfaceReadBandwidthChroma[k]; + mode_lib->mp.TotalDataReadBandwidth = mode_lib->mp.TotalDataReadBandwidth + mode_lib->mp.vactive_sw_bw_l[k] + mode_lib->mp.vactive_sw_bw_c[k]; #ifdef __DML_VBA_DEBUG__ dml2_printf("DML::%s: k=%u, TotalDataReadBandwidth = %f\n", __func__, k, mode_lib->mp.TotalDataReadBandwidth); - dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceLuma = %f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthLuma[k]); - dml2_printf("DML::%s: k=%u, ReadBandwidthSurfaceChroma = %f\n", __func__, k, mode_lib->mp.SurfaceReadBandwidthChroma[k]); + dml2_printf("DML::%s: k=%u, vactive_sw_bw_l = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_l[k]); + dml2_printf("DML::%s: k=%u, vactive_sw_bw_c = %f\n", __func__, k, mode_lib->mp.vactive_sw_bw_c[k]); #endif } @@ -11530,8 +11878,8 @@ static bool dml_core_mode_programming(struct dml2_core_calcs_mode_programming_ex CalculateStutterEfficiency_params->BlockWidth256BytesC = mode_lib->mp.Read256BlockWidthC; CalculateStutterEfficiency_params->DCCYMaxUncompressedBlock = mode_lib->mp.DCCYMaxUncompressedBlock; CalculateStutterEfficiency_params->DCCCMaxUncompressedBlock = mode_lib->mp.DCCCMaxUncompressedBlock; - CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.SurfaceReadBandwidthLuma; - CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.SurfaceReadBandwidthChroma; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceLuma = mode_lib->mp.vactive_sw_bw_l; + CalculateStutterEfficiency_params->ReadBandwidthSurfaceChroma = mode_lib->mp.vactive_sw_bw_c; CalculateStutterEfficiency_params->dpte_row_bw = mode_lib->mp.dpte_row_bw; CalculateStutterEfficiency_params->meta_row_bw = mode_lib->mp.meta_row_bw; CalculateStutterEfficiency_params->rob_alloc_compressed = mode_lib->ip.dcn_mrq_present; @@ -11742,7 +12090,7 @@ static void rq_dlg_get_wm_regs(const struct dml2_display_cfg *display_cfg, const wm_regs->fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); wm_regs->sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); wm_regs->sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); - wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.g6_temp_read_watermark_us * refclk_freq_in_mhz); + wm_regs->temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); wm_regs->uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); wm_regs->urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); wm_regs->usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); @@ -12321,14 +12669,18 @@ void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_interna void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, - struct dmub_fams2_stream_static_state *fams2_programming, - enum dml2_uclk_pstate_support_method pstate_method, + union dmub_cmd_fams2_config *fams2_base_programming, + union dmub_cmd_fams2_config *fams2_sub_programming, + enum dml2_pstate_method pstate_method, int plane_index) { const struct dml2_plane_parameters *plane_descriptor = &display_cfg->display_config.plane_descriptors[plane_index]; const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[plane_descriptor->stream_index]; const struct dml2_fams2_meta *stream_fams2_meta = &display_cfg->stage3.stream_fams2_meta[plane_descriptor->stream_index]; + struct dmub_fams2_cmd_stream_static_base_state *base_programming = &fams2_base_programming->stream_v1.base; + union dmub_fams2_cmd_stream_static_sub_state *sub_programming = &fams2_sub_programming->stream_v1.sub_state; + unsigned int i; if (display_cfg->display_config.overrides.all_streams_blanked) { @@ -12337,110 +12689,110 @@ void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_interna } /* from display configuration */ - fams2_programming->htotal = (uint16_t)stream_descriptor->timing.h_total; - fams2_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total; - fams2_programming->vblank_start = (uint16_t)(stream_fams2_meta->nom_vtotal - + base_programming->htotal = (uint16_t)stream_descriptor->timing.h_total; + base_programming->vtotal = (uint16_t)stream_descriptor->timing.v_total; + base_programming->vblank_start = (uint16_t)(stream_fams2_meta->nom_vtotal - stream_descriptor->timing.v_front_porch); - fams2_programming->vblank_end = (uint16_t)(stream_fams2_meta->nom_vtotal - + base_programming->vblank_end = (uint16_t)(stream_fams2_meta->nom_vtotal - stream_descriptor->timing.v_front_porch - stream_descriptor->timing.v_active); - fams2_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled; + base_programming->config.bits.is_drr = stream_descriptor->timing.drr_config.enabled; /* from meta */ - fams2_programming->otg_vline_time_ns = + base_programming->otg_vline_time_ns = (unsigned int)(stream_fams2_meta->otg_vline_time_us * 1000.0); - fams2_programming->scheduling_delay_otg_vlines = (uint8_t)stream_fams2_meta->scheduling_delay_otg_vlines; - fams2_programming->contention_delay_otg_vlines = (uint8_t)stream_fams2_meta->contention_delay_otg_vlines; - fams2_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines; - fams2_programming->drr_keepout_otg_vline = (uint16_t)(stream_fams2_meta->nom_vtotal - + base_programming->scheduling_delay_otg_vlines = (uint8_t)stream_fams2_meta->scheduling_delay_otg_vlines; + base_programming->contention_delay_otg_vlines = (uint8_t)stream_fams2_meta->contention_delay_otg_vlines; + base_programming->vline_int_ack_delay_otg_vlines = (uint8_t)stream_fams2_meta->vertical_interrupt_ack_delay_otg_vlines; + base_programming->drr_keepout_otg_vline = (uint16_t)(stream_fams2_meta->nom_vtotal - stream_descriptor->timing.v_front_porch - stream_fams2_meta->method_drr.programming_delay_otg_vlines); - fams2_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_fams2_meta->allow_to_target_delay_otg_vlines; - fams2_programming->max_vtotal = (uint16_t)stream_fams2_meta->max_vtotal; + base_programming->allow_to_target_delay_otg_vlines = (uint8_t)stream_fams2_meta->allow_to_target_delay_otg_vlines; + base_programming->max_vtotal = (uint16_t)stream_fams2_meta->max_vtotal; /* from core */ - fams2_programming->config.bits.min_ttu_vblank_usable = true; + base_programming->config.bits.min_ttu_vblank_usable = true; for (i = 0; i < display_cfg->display_config.num_planes; i++) { /* check if all planes support p-state in blank */ if (display_cfg->display_config.plane_descriptors[i].stream_index == plane_descriptor->stream_index && mode_lib->mp.MinTTUVBlank[i] <= mode_lib->mp.Watermark.DRAMClockChangeWatermark) { - fams2_programming->config.bits.min_ttu_vblank_usable = false; + base_programming->config.bits.min_ttu_vblank_usable = false; break; } } switch (pstate_method) { - case dml2_uclk_pstate_support_method_vactive: - case dml2_uclk_pstate_support_method_fw_vactive_drr: + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: /* legacy vactive */ - fams2_programming->type = FAMS2_STREAM_TYPE_VACTIVE; - fams2_programming->sub_state.legacy.vactive_det_fill_delay_otg_vlines = - (uint8_t)stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; - fams2_programming->allow_start_otg_vline = - (uint16_t)stream_fams2_meta->method_vactive.common.allow_start_otg_vline; - fams2_programming->allow_end_otg_vline = - (uint16_t)stream_fams2_meta->method_vactive.common.allow_end_otg_vline; - fams2_programming->config.bits.clamp_vtotal_min = true; + base_programming->type = FAMS2_STREAM_TYPE_VACTIVE; + sub_programming->legacy.vactive_det_fill_delay_otg_vlines = + (uint8_t)stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines; + base_programming->allow_start_otg_vline = + (uint16_t)stream_fams2_meta->method_vactive.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_fams2_meta->method_vactive.common.allow_end_otg_vline; + base_programming->config.bits.clamp_vtotal_min = true; break; - case dml2_uclk_pstate_support_method_vblank: - case dml2_uclk_pstate_support_method_fw_vblank_drr: + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: /* legacy vblank */ - fams2_programming->type = FAMS2_STREAM_TYPE_VBLANK; - fams2_programming->allow_start_otg_vline = - (uint16_t)stream_fams2_meta->method_vblank.common.allow_start_otg_vline; - fams2_programming->allow_end_otg_vline = - (uint16_t)stream_fams2_meta->method_vblank.common.allow_end_otg_vline; - fams2_programming->config.bits.clamp_vtotal_min = true; + base_programming->type = FAMS2_STREAM_TYPE_VBLANK; + base_programming->allow_start_otg_vline = + (uint16_t)stream_fams2_meta->method_vblank.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_fams2_meta->method_vblank.common.allow_end_otg_vline; + base_programming->config.bits.clamp_vtotal_min = true; break; - case dml2_uclk_pstate_support_method_fw_drr: + case dml2_pstate_method_fw_drr: /* drr */ - fams2_programming->type = FAMS2_STREAM_TYPE_DRR; - fams2_programming->sub_state.drr.programming_delay_otg_vlines = - (uint8_t)stream_fams2_meta->method_drr.programming_delay_otg_vlines; - fams2_programming->sub_state.drr.nom_stretched_vtotal = - (uint16_t)stream_fams2_meta->method_drr.stretched_vtotal; - fams2_programming->allow_start_otg_vline = - (uint16_t)stream_fams2_meta->method_drr.common.allow_start_otg_vline; - fams2_programming->allow_end_otg_vline = - (uint16_t)stream_fams2_meta->method_drr.common.allow_end_otg_vline; + base_programming->type = FAMS2_STREAM_TYPE_DRR; + sub_programming->drr.programming_delay_otg_vlines = + (uint8_t)stream_fams2_meta->method_drr.programming_delay_otg_vlines; + sub_programming->drr.nom_stretched_vtotal = + (uint16_t)stream_fams2_meta->method_drr.stretched_vtotal; + base_programming->allow_start_otg_vline = + (uint16_t)stream_fams2_meta->method_drr.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_fams2_meta->method_drr.common.allow_end_otg_vline; /* drr only clamps to vtotal min for single display */ - fams2_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1; - fams2_programming->sub_state.drr.only_stretch_if_required = true; + base_programming->config.bits.clamp_vtotal_min = display_cfg->display_config.num_streams == 1; + sub_programming->drr.only_stretch_if_required = true; break; - case dml2_uclk_pstate_support_method_fw_subvp_phantom: - case dml2_uclk_pstate_support_method_fw_subvp_phantom_drr: + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: /* subvp */ - fams2_programming->type = FAMS2_STREAM_TYPE_SUBVP; - fams2_programming->sub_state.subvp.vratio_numerator = - (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0); - fams2_programming->sub_state.subvp.vratio_denominator = 1000; - fams2_programming->sub_state.subvp.programming_delay_otg_vlines = - (uint8_t)stream_fams2_meta->method_subvp.programming_delay_otg_vlines; - fams2_programming->sub_state.subvp.prefetch_to_mall_otg_vlines = - (uint8_t)stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines; - fams2_programming->sub_state.subvp.phantom_vtotal = - (uint16_t)stream_fams2_meta->method_subvp.phantom_vtotal; - fams2_programming->sub_state.subvp.phantom_vactive = - (uint16_t)stream_fams2_meta->method_subvp.phantom_vactive; - fams2_programming->sub_state.subvp.config.bits.is_multi_planar = - plane_descriptor->surface.plane1.height > 0; - fams2_programming->sub_state.subvp.config.bits.is_yuv420 = - plane_descriptor->pixel_format == dml2_420_8 || - plane_descriptor->pixel_format == dml2_420_10 || - plane_descriptor->pixel_format == dml2_420_12; - - fams2_programming->allow_start_otg_vline = - (uint16_t)stream_fams2_meta->method_subvp.common.allow_start_otg_vline; - fams2_programming->allow_end_otg_vline = - (uint16_t)stream_fams2_meta->method_subvp.common.allow_end_otg_vline; - fams2_programming->config.bits.clamp_vtotal_min = true; + base_programming->type = FAMS2_STREAM_TYPE_SUBVP; + sub_programming->subvp.vratio_numerator = + (uint16_t)(plane_descriptor->composition.scaler_info.plane0.v_ratio * 1000.0); + sub_programming->subvp.vratio_denominator = 1000; + sub_programming->subvp.programming_delay_otg_vlines = + (uint8_t)stream_fams2_meta->method_subvp.programming_delay_otg_vlines; + sub_programming->subvp.prefetch_to_mall_otg_vlines = + (uint8_t)stream_fams2_meta->method_subvp.prefetch_to_mall_delay_otg_vlines; + sub_programming->subvp.phantom_vtotal = + (uint16_t)stream_fams2_meta->method_subvp.phantom_vtotal; + sub_programming->subvp.phantom_vactive = + (uint16_t)stream_fams2_meta->method_subvp.phantom_vactive; + sub_programming->subvp.config.bits.is_multi_planar = + plane_descriptor->surface.plane1.height > 0; + sub_programming->subvp.config.bits.is_yuv420 = + plane_descriptor->pixel_format == dml2_420_8 || + plane_descriptor->pixel_format == dml2_420_10 || + plane_descriptor->pixel_format == dml2_420_12; + + base_programming->allow_start_otg_vline = + (uint16_t)stream_fams2_meta->method_subvp.common.allow_start_otg_vline; + base_programming->allow_end_otg_vline = + (uint16_t)stream_fams2_meta->method_subvp.common.allow_end_otg_vline; + base_programming->config.bits.clamp_vtotal_min = true; break; - case dml2_uclk_pstate_support_method_reserved_hw: - case dml2_uclk_pstate_support_method_reserved_fw: - case dml2_uclk_pstate_support_method_reserved_fw_drr_fixed: - case dml2_uclk_pstate_support_method_reserved_fw_drr_var: - case dml2_uclk_pstate_support_method_not_supported: - case dml2_uclk_pstate_support_method_count: + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_na: + case dml2_pstate_method_count: default: /* this should never happen */ break; @@ -12569,6 +12921,8 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.mode_support_info.InvalidCombinationOfMALLUseForPState = mode_lib->ms.support.InvalidCombinationOfMALLUseForPState; out->informative.mode_support_info.ExceededMALLSize = mode_lib->ms.support.ExceededMALLSize; out->informative.mode_support_info.EnoughWritebackUnits = mode_lib->ms.support.EnoughWritebackUnits; + out->informative.mode_support_info.temp_read_or_ppt_support = mode_lib->ms.support.temp_read_or_ppt_support; + out->informative.mode_support_info.g6_temp_read_support = mode_lib->ms.support.g6_temp_read_support; out->informative.mode_support_info.ExceededMultistreamSlots = mode_lib->ms.support.ExceededMultistreamSlots; out->informative.mode_support_info.NotEnoughDSCUnits = mode_lib->ms.support.NotEnoughDSCUnits; @@ -12662,7 +13016,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.watermarks.pstate_change_us = dml_get_wm_dram_clock_change(mode_lib); out->informative.watermarks.fclk_pstate_change_us = dml_get_wm_fclk_change(mode_lib); out->informative.watermarks.usr_retraining_us = dml_get_wm_usr_retraining(mode_lib); - out->informative.watermarks.g6_temp_read_watermark_us = dml_get_wm_g6_temp_read(mode_lib); + out->informative.watermarks.temp_read_or_ppt_watermark_us = dml_get_wm_temp_read_or_ppt(mode_lib); out->informative.mall.total_surface_size_in_mall_bytes = 0; for (k = 0; k < out->display_config.num_planes; ++k) @@ -12745,6 +13099,8 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.qos.max_active_fclk_change_latency_supported = dml_get_fclk_change_latency(mode_lib); + out->informative.misc.LowestPrefetchMargin = 10 * 1000 * 1000; + for (k = 0; k < out->display_config.num_planes; k++) { if ((out->display_config.plane_descriptors->overrides.reserved_vblank_time_ns >= 1000.0 * mode_lib->soc.power_management_parameters.dram_clk_change_blackout_us) @@ -12824,6 +13180,7 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.misc.DisplayPipeLineDeliveryTimeLumaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeLumaPrefetch[k]; out->informative.misc.DisplayPipeLineDeliveryTimeChromaPrefetch[k] = mode_lib->mp.DisplayPipeLineDeliveryTimeChromaPrefetch[k]; + out->informative.misc.WritebackRequiredBandwidth = mode_lib->scratch.dml_core_mode_programming_locals.TotalWRBandwidth / 1000.0; out->informative.misc.WritebackAllowDRAMClockChangeEndPosition[k] = mode_lib->mp.WritebackAllowDRAMClockChangeEndPosition[k]; out->informative.misc.WritebackAllowFCLKChangeEndPosition[k] = mode_lib->mp.WritebackAllowFCLKChangeEndPosition[k]; out->informative.misc.DSCCLK_calculated[k] = mode_lib->mp.DSCCLK[k]; @@ -12831,6 +13188,9 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.misc.PTE_BUFFER_MODE[k] = mode_lib->mp.PTE_BUFFER_MODE[k]; out->informative.misc.DSCDelay[k] = mode_lib->mp.DSCDelay[k]; out->informative.misc.MaxActiveDRAMClockChangeLatencySupported[k] = mode_lib->mp.MaxActiveDRAMClockChangeLatencySupported[k]; + + if (mode_lib->mp.impacted_prefetch_margin_us[k] < out->informative.misc.LowestPrefetchMargin) + out->informative.misc.LowestPrefetchMargin = mode_lib->mp.impacted_prefetch_margin_us[k]; } // For this DV informative layer, all pipes in the same planes will just use the same id @@ -12853,16 +13213,11 @@ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mod out->informative.non_optimized_mcache_allocation[k].global_mcache_ids_plane1[n] = k; } } - - out->informative.qos.max_non_urgent_latency_us = mode_lib->soc.qos_parameters.qos_params.dcn4x.per_uclk_dpm_params[mode_lib->mp.qos_param_index].maximum_latency_when_non_urgent_uclk_cycles - / mode_lib->mp.uclk_freq_mhz * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.umc_max_latency_margin / 100.0) - + mode_lib->soc.qos_parameters.qos_params.dcn4x.mall_overhead_fclk_cycles / mode_lib->mp.FabricClock - + mode_lib->soc.qos_parameters.qos_params.dcn4x.max_round_trip_to_furthest_cs_fclk_cycles / mode_lib->mp.FabricClock - * (1 + mode_lib->soc.qos_parameters.qos_params.dcn4x.fabric_max_transport_latency_margin / 100.0); + out->informative.qos.max_non_urgent_latency_us = dml_get_max_non_urgent_latency_us(mode_lib); if (mode_lib->soc.qos_parameters.qos_type == dml2_qos_param_type_dcn4x) { if (((mode_lib->ip.rob_buffer_size_kbytes - mode_lib->ip.pixel_chunk_size_kbytes) * 1024 - / mode_lib->mp.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { + / mode_lib->ms.support.non_urg_bandwidth_required[dml2_core_internal_soc_state_sys_active][dml2_core_internal_bw_sdp]) >= out->informative.qos.max_non_urgent_latency_us) { out->informative.misc.ROBUrgencyAvoidance = true; } else { out->informative.misc.ROBUrgencyAvoidance = false; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h index df2d1550a14b..27ef0e096b25 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_dcn4_calcs.h @@ -28,7 +28,7 @@ void dml2_core_calcs_get_plane_support_info(const struct dml2_display_cfg *displ void dml2_core_calcs_get_informative(const struct dml2_core_internal_display_mode_lib *mode_lib, struct dml2_display_cfg_programming *out); void dml2_core_calcs_get_stream_support_info(const struct dml2_display_cfg *display_cfg, const struct dml2_core_internal_display_mode_lib *mode_lib, struct core_stream_support_info *out, int plane_index); void dml2_core_calcs_get_mall_allocation(struct dml2_core_internal_display_mode_lib *mode_lib, unsigned int *out, int pipe_index); -void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_fams2_stream_static_state *fams2_programming, enum dml2_uclk_pstate_support_method pstate_method, int plane_index); +void dml2_core_calcs_get_stream_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, union dmub_cmd_fams2_config *fams2_base_programming, union dmub_cmd_fams2_config *fams2_sub_programming, enum dml2_pstate_method pstate_method, int plane_index); void dml2_core_calcs_get_global_fams2_programming(const struct dml2_core_internal_display_mode_lib *mode_lib, const struct display_configuation_with_meta *display_cfg, struct dmub_cmd_fams2_global_config *fams2_global_config); void dml2_core_calcs_get_dpte_row_height(unsigned int *dpte_row_height, struct dml2_core_internal_display_mode_lib *mode_lib, bool is_plane1, enum dml2_source_format_class SourcePixelFormat, enum dml2_swizzle_mode SurfaceTiling, enum dml2_rotation_angle ScanDirection, unsigned int pitch, unsigned int GPUVMMinPageSizeKBytes); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h index cbdfbd5a0bde..4f54e54102ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_shared_types.h @@ -201,7 +201,7 @@ struct dml2_core_internal_watermarks { double Z8StutterExitWatermark; double Z8StutterEnterPlusExitWatermark; double USRRetrainingWatermark; - double g6_temp_read_watermark_us; + double temp_read_or_ppt_watermark_us; }; struct dml2_core_internal_mode_support_info { @@ -252,8 +252,8 @@ struct dml2_core_internal_mode_support_info { bool PTEBufferSizeNotExceeded; bool DCCMetaBufferSizeNotExceeded; - enum dml2_dram_clock_change_support DRAMClockChangeSupport[DML2_MAX_PLANES]; - enum dml2_fclock_change_support FCLKChangeSupport[DML2_MAX_PLANES]; + enum dml2_pstate_change_support DRAMClockChangeSupport[DML2_MAX_PLANES]; + enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES]; bool global_dram_clock_change_supported; bool global_fclk_change_supported; bool USRRetrainingSupport; @@ -318,12 +318,15 @@ struct dml2_core_internal_mode_support_info { bool avg_bandwidth_support_ok[dml2_core_internal_soc_state_max][dml2_core_internal_bw_max]; double max_urgent_latency_us; + double max_non_urgent_latency_us; double avg_non_urgent_latency_us; double avg_urgent_latency_us; + double df_response_time_us; bool incorrect_imall_usage; bool g6_temp_read_support; + bool temp_read_or_ppt_support; struct dml2_core_internal_watermarks watermarks; }; @@ -378,8 +381,8 @@ struct dml2_core_internal_mode_support { unsigned int DETBufferSizeC[DML2_MAX_PLANES]; unsigned int SwathHeightY[DML2_MAX_PLANES]; unsigned int SwathHeightC[DML2_MAX_PLANES]; - unsigned int SwathWidthY[DML2_MAX_PLANES]; - unsigned int SwathWidthC[DML2_MAX_PLANES]; + unsigned int SwathWidthY[DML2_MAX_PLANES]; // per-pipe + unsigned int SwathWidthC[DML2_MAX_PLANES]; // per-pipe // ---------------------------------- // Intermediates/Informational @@ -476,9 +479,9 @@ struct dml2_core_internal_mode_support { // Bandwidth Related Info double BandwidthAvailableForImmediateFlip; - double SurfaceReadBandwidthLuma[DML2_MAX_PLANES]; // no dcc overhead, for the plane - double SurfaceReadBandwidthChroma[DML2_MAX_PLANES]; - double WriteBandwidth[DML2_MAX_PLANES]; + double vactive_sw_bw_l[DML2_MAX_PLANES]; // no dcc overhead, for the plane + double vactive_sw_bw_c[DML2_MAX_PLANES]; + double WriteBandwidth[DML2_MAX_PLANES][DML2_MAX_WRITEBACK]; double RequiredPrefetchPixelDataBWLuma[DML2_MAX_PLANES]; double RequiredPrefetchPixelDataBWChroma[DML2_MAX_PLANES]; double cursor_bw[DML2_MAX_PLANES]; @@ -539,7 +542,7 @@ struct dml2_core_internal_mode_program { unsigned int qos_param_index; // to access the uclk dependent dpm table unsigned int active_min_uclk_dpm_index; // to access the min_clk table double FabricClock; /// <brief Basically just the clock freq at the min (or given) state - double DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting + //double DCFCLK; /// <brief Basically just the clock freq at the min (or given) state and max combine setting double dram_bw_mbps; double uclk_freq_mhz; unsigned int NoOfDPP[DML2_MAX_PLANES]; @@ -562,14 +565,14 @@ struct dml2_core_internal_mode_program { double BytePerPixelInDETC[DML2_MAX_PLANES]; unsigned int BytePerPixelY[DML2_MAX_PLANES]; unsigned int BytePerPixelC[DML2_MAX_PLANES]; - unsigned int SwathWidthY[DML2_MAX_PLANES]; - unsigned int SwathWidthC[DML2_MAX_PLANES]; + unsigned int SwathWidthY[DML2_MAX_PLANES]; // per-pipe + unsigned int SwathWidthC[DML2_MAX_PLANES]; // per-pipe unsigned int req_per_swath_ub_l[DML2_MAX_PLANES]; unsigned int req_per_swath_ub_c[DML2_MAX_PLANES]; unsigned int SwathWidthSingleDPPY[DML2_MAX_PLANES]; unsigned int SwathWidthSingleDPPC[DML2_MAX_PLANES]; - double SurfaceReadBandwidthLuma[DML2_MAX_PLANES]; - double SurfaceReadBandwidthChroma[DML2_MAX_PLANES]; + double vactive_sw_bw_l[DML2_MAX_PLANES]; + double vactive_sw_bw_c[DML2_MAX_PLANES]; double excess_vactive_fill_bw_l[DML2_MAX_PLANES]; double excess_vactive_fill_bw_c[DML2_MAX_PLANES]; @@ -797,8 +800,9 @@ struct dml2_core_internal_mode_program { double MaxActiveFCLKChangeLatencySupported; bool USRRetrainingSupport; bool g6_temp_read_support; - enum dml2_fclock_change_support FCLKChangeSupport[DML2_MAX_PLANES]; - enum dml2_dram_clock_change_support DRAMClockChangeSupport[DML2_MAX_PLANES]; + bool temp_read_or_ppt_support; + enum dml2_pstate_change_support FCLKChangeSupport[DML2_MAX_PLANES]; + enum dml2_pstate_change_support DRAMClockChangeSupport[DML2_MAX_PLANES]; bool global_dram_clock_change_supported; bool global_fclk_change_supported; double MaxActiveDRAMClockChangeLatencySupported[DML2_MAX_PLANES]; @@ -846,6 +850,8 @@ struct dml2_core_internal_mode_program { bool mall_comb_mcache_l[DML2_MAX_PLANES]; bool mall_comb_mcache_c[DML2_MAX_PLANES]; bool lc_comb_mcache[DML2_MAX_PLANES]; + + double impacted_prefetch_margin_us[DML2_MAX_PLANES]; }; struct dml2_core_internal_SOCParametersList { @@ -862,6 +868,7 @@ struct dml2_core_internal_SOCParametersList { double USRRetrainingLatency; double SMNLatency; double g6_temp_read_blackout_us; + double temp_read_or_ppt_blackout_us; double max_urgent_latency_us; double df_response_time_us; enum dml2_qos_param_type qos_type; @@ -961,6 +968,17 @@ struct dml2_core_calcs_mode_support_locals { unsigned int pstate_bytes_required_l[DML2_MAX_PLANES]; unsigned int pstate_bytes_required_c[DML2_MAX_PLANES]; + + double prefetch_sw_bytes[DML2_MAX_PLANES]; + double Tpre_rounded[DML2_MAX_PLANES]; + double Tpre_oto[DML2_MAX_PLANES]; + bool recalc_prefetch_schedule; + bool recalc_prefetch_done; + double impacted_dst_y_pre[DML2_MAX_PLANES]; + double line_times[DML2_MAX_PLANES]; + enum dml2_source_format_class pixel_format[DML2_MAX_PLANES]; + unsigned int lb_source_lines_l[DML2_MAX_PLANES]; + unsigned int lb_source_lines_c[DML2_MAX_PLANES]; }; struct dml2_core_calcs_mode_programming_locals { @@ -1041,6 +1059,16 @@ struct dml2_core_calcs_mode_programming_locals { unsigned int pstate_bytes_required_l[DML2_MAX_PLANES]; unsigned int pstate_bytes_required_c[DML2_MAX_PLANES]; + + double prefetch_sw_bytes[DML2_MAX_PLANES]; + double Tpre_rounded[DML2_MAX_PLANES]; + double Tpre_oto[DML2_MAX_PLANES]; + bool recalc_prefetch_schedule; + double impacted_dst_y_pre[DML2_MAX_PLANES]; + double line_times[DML2_MAX_PLANES]; + enum dml2_source_format_class pixel_format[DML2_MAX_PLANES]; + unsigned int lb_source_lines_l[DML2_MAX_PLANES]; + unsigned int lb_source_lines_c[DML2_MAX_PLANES]; }; struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals { @@ -1048,6 +1076,7 @@ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_local double ActiveFCLKChangeLatencyMargin[DML2_MAX_PLANES]; double USRRetrainingLatencyMargin[DML2_MAX_PLANES]; double g6_temp_read_latency_margin[DML2_MAX_PLANES]; + double temp_read_or_ppt_latency_margin[DML2_MAX_PLANES]; double EffectiveLBLatencyHidingY; double EffectiveLBLatencyHidingC; @@ -1185,17 +1214,14 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_locals { double LineTime; double dst_y_prefetch_equ; double prefetch_bw_oto; + double per_pipe_vactive_sw_bw; double Tvm_oto; double Tr0_oto; - double Tvm_no_trip_oto; - double Tr0_no_trip_oto; double Tvm_oto_lines; double Tr0_oto_lines; double dst_y_prefetch_oto; double TimeForFetchingVM; double TimeForFetchingRowInVBlank; - double dst_y_per_vm_no_trip_vblank; - double dst_y_per_row_no_trip_vblank; double LinesToRequestPrefetchPixelData; unsigned int HostVMDynamicLevelsTrips; double trip_to_mem; @@ -1203,15 +1229,12 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_locals { double Tr0_trips_rounded; double max_Tsw; double Lsw_oto; - double Lsw_equ; - double Tpre_rounded; double prefetch_bw_equ; double Tvm_equ; double Tr0_equ; double Tdmbf; double Tdmec; double Tdmsks; - double prefetch_sw_bytes; double total_row_bytes; double prefetch_bw_pr; double bytes_pp; @@ -1225,6 +1248,7 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_locals { double prefetch_bw2; double prefetch_bw3; double prefetch_bw4; + double dst_y_prefetch_equ_impacted; double TWait_p; unsigned int cursor_prefetch_bytes; @@ -1545,17 +1569,18 @@ struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_param // Output struct dml2_core_internal_watermarks *Watermark; - enum dml2_dram_clock_change_support *DRAMClockChangeSupport; + enum dml2_pstate_change_support *DRAMClockChangeSupport; bool *global_dram_clock_change_supported; double *MaxActiveDRAMClockChangeLatencySupported; unsigned int *SubViewportLinesNeededInMALL; - enum dml2_fclock_change_support *FCLKChangeSupport; + enum dml2_pstate_change_support *FCLKChangeSupport; bool *global_fclk_change_supported; double *MaxActiveFCLKChangeLatencySupported; bool *USRRetrainingSupport; double *VActiveLatencyHidingMargin; double *VActiveLatencyHidingUs; bool *g6_temp_read_support; + bool *temp_read_or_ppt_support; }; @@ -1727,8 +1752,8 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { double PrefetchSourceLinesC; unsigned int VInitPreFillC; unsigned int MaxNumSwathC; - unsigned int swath_width_luma_ub; - unsigned int swath_width_chroma_ub; + unsigned int swath_width_luma_ub; // per-pipe + unsigned int swath_width_chroma_ub; // per-pipe unsigned int SwathHeightY; unsigned int SwathHeightC; double TWait; @@ -1750,6 +1775,10 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { unsigned int meta_row_bytes; double mall_prefetch_sdp_overhead_factor; + double impacted_dst_y_pre; + double vactive_sw_bw_l; // per surface bw + double vactive_sw_bw_c; // per surface bw + // output unsigned int *DSTXAfterScaler; unsigned int *DSTYAfterScaler; @@ -1767,6 +1796,8 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { double *Tdmdl_vm; double *Tdmdl; double *TSetup; + double *Tpre_rounded; + double *Tpre_oto; double *Tvm_trips; double *Tr0_trips; double *Tvm_trips_flip; @@ -1777,6 +1808,47 @@ struct dml2_core_calcs_CalculatePrefetchSchedule_params { unsigned int *VUpdateWidthPix; unsigned int *VReadyOffsetPix; double *prefetch_cursor_bw; + double *prefetch_sw_bytes; +}; + +struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params { + unsigned int num_active_planes; + enum dml2_source_format_class *pixel_format; + unsigned int rob_buffer_size_kbytes; + unsigned int compressed_buffer_size_kbytes; + unsigned int chunk_bytes_l; // same for all planes + unsigned int chunk_bytes_c; + unsigned int *detile_buffer_size_bytes_l; + unsigned int *detile_buffer_size_bytes_c; + unsigned int *full_swath_bytes_l; + unsigned int *full_swath_bytes_c; + unsigned int *lb_source_lines_l; + unsigned int *lb_source_lines_c; + unsigned int *swath_height_l; + unsigned int *swath_height_c; + double *prefetch_sw_bytes; + double *Tpre_rounded; + double *Tpre_oto; + double estimated_dcfclk_mhz; + double estimated_urg_bandwidth_required_mbps; + double *line_time; + double *dst_y_prefetch; + + // output + bool *recalc_prefetch_schedule; + double *impacted_dst_y_pre; +}; + +struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals { + unsigned int max_Trpd_dcfclk_cycles; + unsigned int burst_bytes_to_fill_det; + double time_to_fill_det_us; + unsigned int accumulated_return_path_dcfclk_cycles[DML2_MAX_PLANES]; + bool prefetch_global_check_passed; + unsigned int src_swath_bytes_l[DML2_MAX_PLANES]; + unsigned int src_swath_bytes_c[DML2_MAX_PLANES]; + unsigned int src_detile_buf_size_bytes_l[DML2_MAX_PLANES]; + unsigned int src_detile_buf_size_bytes_c[DML2_MAX_PLANES]; }; struct dml2_core_calcs_calculate_mcache_row_bytes_params { @@ -2004,6 +2076,7 @@ struct dml2_core_internal_scratch { struct dml2_core_calcs_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_locals; struct dml2_core_calcs_CalculateVMRowAndSwath_locals CalculateVMRowAndSwath_locals; struct dml2_core_calcs_CalculatePrefetchSchedule_locals CalculatePrefetchSchedule_locals; + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_locals CheckGlobalPrefetchAdmissibility_locals; struct dml2_core_shared_CalculateSwathAndDETConfiguration_locals CalculateSwathAndDETConfiguration_locals; struct dml2_core_shared_TruncToValidBPP_locals TruncToValidBPP_locals; struct dml2_core_shared_CalculateDETBufferSize_locals CalculateDETBufferSize_locals; @@ -2019,6 +2092,7 @@ struct dml2_core_internal_scratch { struct dml2_core_calcs_CalculateSwathAndDETConfiguration_params CalculateSwathAndDETConfiguration_params; struct dml2_core_calcs_CalculateStutterEfficiency_params CalculateStutterEfficiency_params; struct dml2_core_calcs_CalculatePrefetchSchedule_params CalculatePrefetchSchedule_params; + struct dml2_core_calcs_CheckGlobalPrefetchAdmissibility_params CheckGlobalPrefetchAdmissibility_params; struct dml2_core_calcs_calculate_mcache_setting_params calculate_mcache_setting_params; struct dml2_core_calcs_calculate_tdlut_setting_params calculate_tdlut_setting_params; struct dml2_core_shared_calculate_vm_and_row_bytes_params calculate_vm_and_row_bytes_params; @@ -2038,7 +2112,6 @@ struct dml2_core_internal_display_mode_lib { // Used to hold input; intermediate and output of the calculations struct dml2_core_internal_mode_support ms; // struct for mode support struct dml2_core_internal_mode_program mp; // struct for mode programming - // Available overridable calculators for core_shared. // if null, core_shared will use default calculators. struct dml2_core_shared_calculation_funcs funcs; @@ -2051,7 +2124,6 @@ struct dml2_core_calcs_mode_support_ex { const struct dml2_display_cfg *in_display_cfg; const struct dml2_mcg_min_clock_table *min_clk_table; int min_clk_index; - //unsigned int in_state_index; struct dml2_core_internal_mode_support_info *out_evaluation_info; }; @@ -2064,7 +2136,6 @@ struct dml2_core_calcs_mode_programming_ex { const struct dml2_mcg_min_clock_table *min_clk_table; const struct core_display_cfg_support_info *cfg_support_info; int min_clk_index; - struct dml2_display_cfg_programming *programming; }; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c index 714b5c39b7e6..1548dfc68b8e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.c @@ -63,6 +63,150 @@ bool dml2_core_utils_is_420(enum dml2_source_format_class source_format) case dml2_mono_16: val = 0; break; + case dml2_422_planar_8: + val = 0; + break; + case dml2_422_planar_10: + val = 0; + break; + case dml2_422_planar_12: + val = 0; + break; + case dml2_422_packed_8: + val = 0; + break; + case dml2_422_packed_10: + val = 0; + break; + case dml2_422_packed_12: + val = 0; + break; + default: + DML2_ASSERT(0); + break; + } + return val; +} + +bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 0; + break; + case dml2_420_10: + val = 0; + break; + case dml2_420_12: + val = 0; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + case dml2_422_planar_8: + val = 1; + break; + case dml2_422_planar_10: + val = 1; + break; + case dml2_422_planar_12: + val = 1; + break; + case dml2_422_packed_8: + val = 0; + break; + case dml2_422_packed_10: + val = 0; + break; + case dml2_422_packed_12: + val = 0; + break; + default: + DML2_ASSERT(0); + break; + } + return val; +} + +bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format) +{ + bool val = false; + + switch (source_format) { + case dml2_444_8: + val = 0; + break; + case dml2_444_16: + val = 0; + break; + case dml2_444_32: + val = 0; + break; + case dml2_444_64: + val = 0; + break; + case dml2_420_8: + val = 0; + break; + case dml2_420_10: + val = 0; + break; + case dml2_420_12: + val = 0; + break; + case dml2_rgbe_alpha: + val = 0; + break; + case dml2_rgbe: + val = 0; + break; + case dml2_mono_8: + val = 0; + break; + case dml2_mono_16: + val = 0; + break; + case dml2_422_planar_8: + val = 0; + break; + case dml2_422_planar_10: + val = 0; + break; + case dml2_422_planar_12: + val = 0; + break; + case dml2_422_packed_8: + val = 1; + break; + case dml2_422_packed_10: + val = 1; + break; + case dml2_422_packed_12: + val = 1; + break; default: DML2_ASSERT(0); break; @@ -154,9 +298,9 @@ void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mod dml2_printf("DML: support: DynamicMetadataSupported = %d\n", support->DynamicMetadataSupported); if (!fail_only || support->VRatioInPrefetchSupported == 0) dml2_printf("DML: support: VRatioInPrefetchSupported = %d\n", support->VRatioInPrefetchSupported); - if (!fail_only || support->PTEBufferSizeNotExceeded == 1) + if (!fail_only || support->PTEBufferSizeNotExceeded == 0) dml2_printf("DML: support: PTEBufferSizeNotExceeded = %d\n", support->PTEBufferSizeNotExceeded); - if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 1) + if (!fail_only || support->DCCMetaBufferSizeNotExceeded == 0) dml2_printf("DML: support: DCCMetaBufferSizeNotExceeded = %d\n", support->DCCMetaBufferSizeNotExceeded); if (!fail_only || support->ExceededMALLSize == 1) dml2_printf("DML: support: ExceededMALLSize = %d\n", support->ExceededMALLSize); @@ -280,39 +424,49 @@ bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_c return is_phantom; } -unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode) -{ - switch (sw_mode) { - case (dml2_sw_linear): - return 256; break; - case (dml2_sw_256b_2d): - return 256; break; - case (dml2_sw_4kb_2d): - return 4096; break; - case (dml2_sw_64kb_2d): - return 65536; break; - case (dml2_sw_256kb_2d): - return 262144; break; - case (dml2_gfx11_sw_linear): - return 256; break; - case (dml2_gfx11_sw_64kb_d): - return 65536; break; - case (dml2_gfx11_sw_64kb_d_t): - return 65536; break; - case (dml2_gfx11_sw_64kb_d_x): - return 65536; break; - case (dml2_gfx11_sw_64kb_r_x): - return 65536; break; - case (dml2_gfx11_sw_256kb_d_x): - return 262144; break; - case (dml2_gfx11_sw_256kb_r_x): - return 262144; break; - default: +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel) +{ + + if (sw_mode == dml2_sw_linear) + return 256; + else if (sw_mode == dml2_sw_256b_2d) + return 256; + else if (sw_mode == dml2_sw_4kb_2d) + return 4096; + else if (sw_mode == dml2_sw_64kb_2d) + return 65536; + else if (sw_mode == dml2_sw_256kb_2d) + return 262144; + else if (sw_mode == dml2_gfx11_sw_linear) + return 256; + else if (sw_mode == dml2_gfx11_sw_64kb_d) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_t) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_d_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_64kb_r_x) + return 65536; + else if (sw_mode == dml2_gfx11_sw_256kb_d_x) + return 262144; + else if (sw_mode == dml2_gfx11_sw_256kb_r_x) + return 262144; + else { DML2_ASSERT(0); return 256; }; } +bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel) +{ + return (byte_per_pixel != 2); +} + +bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode) +{ + return (sw_mode == dml2_sw_linear || sw_mode == dml2_sw_linear_256b || sw_mode == dml2_linear_64elements); +}; + bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan) { @@ -325,7 +479,6 @@ bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan) return is_vert; } - int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) { int unsigned version = 0; @@ -334,17 +487,17 @@ int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode) sw_mode == dml2_sw_256b_2d || sw_mode == dml2_sw_4kb_2d || sw_mode == dml2_sw_64kb_2d || - sw_mode == dml2_sw_256kb_2d) { + sw_mode == dml2_sw_256kb_2d) version = 12; - } else if (sw_mode == dml2_gfx11_sw_linear || + else if (sw_mode == dml2_gfx11_sw_linear || sw_mode == dml2_gfx11_sw_64kb_d || sw_mode == dml2_gfx11_sw_64kb_d_t || sw_mode == dml2_gfx11_sw_64kb_d_x || sw_mode == dml2_gfx11_sw_64kb_r_x || sw_mode == dml2_gfx11_sw_256kb_d_x || - sw_mode == dml2_gfx11_sw_256kb_r_x) { + sw_mode == dml2_gfx11_sw_256kb_r_x) version = 11; - } else { + else { dml2_printf("ERROR: Invalid sw_mode setting! val=%u\n", sw_mode); DML2_ASSERT(0); } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h index a5cc6a07167a..95f0d017add4 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_core/dml2_core_utils.h @@ -11,6 +11,8 @@ double dml2_core_utils_div_rem(double dividend, unsigned int divisor, unsigned int *remainder); const char *dml2_core_utils_internal_bw_type_str(enum dml2_core_internal_bw_type bw_type); bool dml2_core_utils_is_420(enum dml2_source_format_class source_format); +bool dml2_core_utils_is_422_planar(enum dml2_source_format_class source_format); +bool dml2_core_utils_is_422_packed(enum dml2_source_format_class source_format); void dml2_core_utils_print_mode_support_info(const struct dml2_core_internal_mode_support_info *support, bool fail_only); const char *dml2_core_utils_internal_soc_state_type_str(enum dml2_core_internal_soc_state_type dml2_core_internal_soc_state_type); void dml2_core_utils_get_stream_output_bpp(double *out_bpp, const struct dml2_display_cfg *display_cfg); @@ -18,8 +20,10 @@ unsigned int dml2_core_utils_round_to_multiple(unsigned int num, unsigned int mu unsigned int dml2_core_util_get_num_active_pipes(int unsigned num_planes, const struct core_display_cfg_support_info *cfg_support_info); void dml2_core_utils_pipe_plane_mapping(const struct core_display_cfg_support_info *cfg_support_info, unsigned int *pipe_plane); bool dml2_core_utils_is_phantom_pipe(const struct dml2_plane_parameters *plane_cfg); -unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode); +unsigned int dml2_core_utils_get_tile_block_size_bytes(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel); +bool dml2_core_utils_get_segment_horizontal_contiguous(enum dml2_swizzle_mode sw_mode, unsigned int byte_per_pixel); bool dml2_core_utils_is_vertical_rotation(enum dml2_rotation_angle Scan); +bool dml2_core_utils_is_linear(enum dml2_swizzle_mode sw_mode); int unsigned dml2_core_utils_get_gfx_version(enum dml2_swizzle_mode sw_mode); unsigned int dml2_core_utils_get_qos_param_index(unsigned long uclk_freq_khz, const struct dml2_dcn4_uclk_dpm_dependent_qos_params *per_uclk_dpm_params); unsigned int dml2_core_utils_get_active_min_uclk_dpm_index(unsigned long uclk_freq_khz, const struct dml2_soc_state_table *clk_table); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c index 8869ea089312..8a78b9adfc62 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_dpmm/dml2_dpmm_dcn4.c @@ -96,6 +96,7 @@ static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm double min_uclk_latency; const struct dml2_core_mode_support_result *mode_support_result = &in_out->display_cfg->mode_support_result; + /* assumes DF throttling is enabled */ min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.dcn_mall_prefetch_average.dram_derate_percent_pixel / 100); @@ -125,6 +126,37 @@ static void calculate_svp_prefetch_minimums(struct dml2_dpmm_map_mode_to_soc_dpm in_out->programming->min_clocks.dcn4x.svp_prefetch.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); in_out->programming->min_clocks.dcn4x.svp_prefetch.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); in_out->programming->min_clocks.dcn4x.svp_prefetch.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); + + /* assumes DF throttling is disabled */ + min_uclk_avg = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.average_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_avg = (double)min_uclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dram_derate_percent_pixel / 100); + + min_uclk_urgent = dram_bw_kbps_to_uclk_khz(mode_support_result->global.svp_prefetch.urgent_bw_dram_kbps, &in_out->soc_bb->clk_table.dram_config); + min_uclk_urgent = (double)min_uclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dram_derate_percent_pixel / 100); + + min_uclk_bw = min_uclk_urgent > min_uclk_avg ? min_uclk_urgent : min_uclk_avg; + + min_fclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_avg = (double)min_fclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.fclk_derate_percent / 100); + + min_fclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->fabric_datapath_to_dcn_data_return_bytes; + min_fclk_urgent = (double)min_fclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.fclk_derate_percent / 100); + + min_fclk_bw = min_fclk_urgent > min_fclk_avg ? min_fclk_urgent : min_fclk_avg; + + min_dcfclk_avg = (double)mode_support_result->global.svp_prefetch.average_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_avg = (double)min_dcfclk_avg / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_average.dcfclk_derate_percent / 100); + + min_dcfclk_urgent = (double)mode_support_result->global.svp_prefetch.urgent_bw_sdp_kbps / in_out->soc_bb->return_bus_width_bytes; + min_dcfclk_urgent = (double)min_dcfclk_urgent / ((double)in_out->soc_bb->qos_parameters.derate_table.system_active_urgent.dcfclk_derate_percent / 100); + + min_dcfclk_bw = min_dcfclk_urgent > min_dcfclk_avg ? min_dcfclk_urgent : min_dcfclk_avg; + + get_minimum_clocks_for_latency(in_out, &min_uclk_latency, &min_fclk_latency, &min_dcfclk_latency); + + in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = dml_round_up(min_uclk_bw > min_uclk_latency ? min_uclk_bw : min_uclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = dml_round_up(min_fclk_bw > min_fclk_latency ? min_fclk_bw : min_fclk_latency); + in_out->programming->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = dml_round_up(min_dcfclk_bw > min_dcfclk_latency ? min_dcfclk_bw : min_dcfclk_latency); } static void calculate_idle_minimums(struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out *in_out) @@ -180,7 +212,7 @@ static bool add_margin_and_round_to_dfs_grainularity(double clock_khz, double ma clock_khz *= 1.0 + margin; - divider = (unsigned int)((int)DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); + divider = (unsigned int)(DFS_DIVIDER_RANGE_SCALE_FACTOR * (vco_freq_khz / clock_khz)); /* we want to floor here to get higher clock than required rather than lower */ if (divider < DFS_DIVIDER_RANGE_2_START) { @@ -272,6 +304,17 @@ static bool map_soc_min_clocks_to_dpm_fine_grained(struct dml2_display_cfg_progr if (result) result = round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.idle.uclk_khz, &state_table->uclk); + /* these clocks are optional, so they can fail to map, in which case map all to 0 */ + if (result) { + if (!round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz, &state_table->dcfclk) || + !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz, &state_table->fclk) || + !round_up_to_next_dpm(&display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz, &state_table->uclk)) { + display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.dcfclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.fclk_khz = 0; + display_cfg->min_clocks.dcn4x.svp_prefetch_no_throttle.uclk_khz = 0; + } + } + return result; } @@ -711,7 +754,7 @@ bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.g6_temp_read_watermark_us * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_A].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); @@ -725,7 +768,7 @@ bool dpmm_dcn4_map_watermarks(struct dml2_dpmm_map_watermarks_params_in_out *in_ dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].fclk_pstate = (int unsigned)(mode_lib->mp.Watermark.FCLKChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_enter = (int unsigned)(mode_lib->mp.Watermark.StutterEnterPlusExitWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].sr_exit = (int unsigned)(mode_lib->mp.Watermark.StutterExitWatermark * refclk_freq_in_mhz); - dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.g6_temp_read_watermark_us * refclk_freq_in_mhz); + dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].temp_read_or_ppt = (int unsigned)(mode_lib->mp.Watermark.temp_read_or_ppt_watermark_us * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].uclk_pstate = (int unsigned)(mode_lib->mp.Watermark.DRAMClockChangeWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].urgent = (int unsigned)(mode_lib->mp.Watermark.UrgentWatermark * refclk_freq_in_mhz); dchubbub_regs->wm_regs[DML2_DCHUB_WATERMARK_SET_B].usr = (int unsigned)(mode_lib->mp.Watermark.USRRetrainingWatermark * refclk_freq_in_mhz); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c index 92269f0e50ed..1efbc0329f85 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.c @@ -13,32 +13,32 @@ static const double MIN_BLANK_STUTTER_FACTOR = 3.0; static const struct dml2_pmo_pstate_strategy base_strategy_list_1_display[] = { // VActive Preferred { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Then SVP { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Then VBlank { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = false, }, // Then DRR { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Finally VBlank, but allow base clocks for latency to increase /* { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, */ @@ -49,56 +49,56 @@ static const int base_strategy_list_1_display_size = sizeof(base_strategy_list_1 static const struct dml2_pmo_pstate_strategy base_strategy_list_2_display[] = { // VActive only is preferred { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Then VActive + VBlank { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = false, }, // Then VBlank only { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = false, }, // Then SVP + VBlank { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = false, }, // Then SVP + DRR { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Then SVP + SVP { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_fw_svp, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_fw_svp, dml2_pstate_method_fw_svp, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Then DRR + VActive { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Then DRR + DRR { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, // Finally VBlank, but allow base clocks for latency to increase /* { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na, dml2_pstate_method_na }, .allow_state_increase = true, }, */ @@ -109,32 +109,32 @@ static const int base_strategy_list_2_display_size = sizeof(base_strategy_list_2 static const struct dml2_pmo_pstate_strategy base_strategy_list_3_display[] = { // All VActive { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_na }, .allow_state_increase = true, }, // VActive + 1 VBlank { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vblank, dml2_pstate_method_na }, .allow_state_increase = false, }, // All VBlank { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na }, .allow_state_increase = false, }, // All DRR { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_na }, .allow_state_increase = true, }, // All VBlank, with state increase allowed /* { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_na }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_na }, .allow_state_increase = true, }, */ @@ -145,32 +145,32 @@ static const int base_strategy_list_3_display_size = sizeof(base_strategy_list_3 static const struct dml2_pmo_pstate_strategy base_strategy_list_4_display[] = { // All VActive { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive }, .allow_state_increase = true, }, // VActive + 1 VBlank { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vactive, dml2_pmo_pstate_strategy_vblank }, + .per_stream_pstate_method = { dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vactive, dml2_pstate_method_vblank }, .allow_state_increase = false, }, // All Vblank { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank }, .allow_state_increase = false, }, // All DRR { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr, dml2_pmo_pstate_strategy_fw_drr }, + .per_stream_pstate_method = { dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr, dml2_pstate_method_fw_drr }, .allow_state_increase = true, }, // All VBlank, with state increase allowed /* { - .per_stream_pstate_method = { dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank, dml2_pmo_pstate_strategy_vblank }, + .per_stream_pstate_method = { dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank, dml2_pstate_method_vblank }, .allow_state_increase = true, }, */ @@ -355,29 +355,30 @@ bool pmo_dcn4_fams2_optimize_dcc_mcache(struct dml2_pmo_optimize_dcc_mcache_in_o return result; } -static enum dml2_pmo_pstate_method convert_strategy_to_drr_variant(const enum dml2_pmo_pstate_method base_strategy) +static enum dml2_pstate_method convert_strategy_to_drr_variant(const enum dml2_pstate_method base_strategy) { - enum dml2_pmo_pstate_method variant_strategy = 0; + enum dml2_pstate_method variant_strategy = 0; switch (base_strategy) { - case dml2_pmo_pstate_strategy_vactive: - variant_strategy = dml2_pmo_pstate_strategy_fw_vactive_drr; + case dml2_pstate_method_vactive: + variant_strategy = dml2_pstate_method_fw_vactive_drr; break; - case dml2_pmo_pstate_strategy_vblank: - variant_strategy = dml2_pmo_pstate_strategy_fw_vblank_drr; + case dml2_pstate_method_vblank: + variant_strategy = dml2_pstate_method_fw_vblank_drr; break; - case dml2_pmo_pstate_strategy_fw_svp: - variant_strategy = dml2_pmo_pstate_strategy_fw_svp_drr; + case dml2_pstate_method_fw_svp: + variant_strategy = dml2_pstate_method_fw_svp_drr; break; - case dml2_pmo_pstate_strategy_fw_vactive_drr: - case dml2_pmo_pstate_strategy_fw_vblank_drr: - case dml2_pmo_pstate_strategy_fw_svp_drr: - case dml2_pmo_pstate_strategy_fw_drr: - case dml2_pmo_pstate_strategy_reserved_hw: - case dml2_pmo_pstate_strategy_reserved_fw: - case dml2_pmo_pstate_strategy_reserved_fw_drr_clamped: - case dml2_pmo_pstate_strategy_reserved_fw_drr_var: - case dml2_pmo_pstate_strategy_na: + case dml2_pstate_method_fw_vactive_drr: + case dml2_pstate_method_fw_vblank_drr: + case dml2_pstate_method_fw_svp_drr: + case dml2_pstate_method_fw_drr: + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_count: + case dml2_pstate_method_na: default: /* no variant for this mode */ variant_strategy = base_strategy; @@ -419,23 +420,22 @@ static unsigned int get_num_expanded_strategies( static void insert_strategy_into_expanded_list( const struct dml2_pmo_pstate_strategy *per_stream_pstate_strategy, - int stream_count, - struct dml2_pmo_init_data *init_data) + const int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) { - struct dml2_pmo_pstate_strategy *expanded_strategy_list = NULL; - - expanded_strategy_list = get_expanded_strategy_list(init_data, stream_count); - - if (expanded_strategy_list) { - memcpy(&expanded_strategy_list[init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy)); + if (expanded_strategy_list && num_expanded_strategies) { + memcpy(&expanded_strategy_list[*num_expanded_strategies], per_stream_pstate_strategy, sizeof(struct dml2_pmo_pstate_strategy)); - init_data->pmo_dcn4.num_expanded_strategies_per_list[stream_count - 1]++; + (*num_expanded_strategies)++; } } -static void expand_base_strategy(struct dml2_pmo_instance *pmo, +static void expand_base_strategy( const struct dml2_pmo_pstate_strategy *base_strategy, - unsigned int stream_count) + const unsigned int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) { bool skip_to_next_stream; bool expanded_strategy_added; @@ -473,7 +473,7 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, if (i >= stream_count - 1) { /* insert into strategy list */ - insert_strategy_into_expanded_list(&cur_strategy_list, stream_count, &pmo->init_data); + insert_strategy_into_expanded_list(&cur_strategy_list, stream_count, expanded_strategy_list, num_expanded_strategies); expanded_strategy_added = true; } else { /* skip to next stream */ @@ -512,9 +512,9 @@ static void expand_base_strategy(struct dml2_pmo_instance *pmo, static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_strategy, const struct dml2_pmo_pstate_strategy *variant_strategy, - unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS], - unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS], - unsigned int stream_count) + const unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS], + const unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS], + const unsigned int stream_count) { bool valid = true; unsigned int i; @@ -522,7 +522,7 @@ static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_ /* check all restrictions are met */ for (i = 0; i < stream_count; i++) { /* vblank + vblank_drr variants are invalid */ - if (base_strategy->per_stream_pstate_method[i] == dml2_pmo_pstate_strategy_vblank && + if (base_strategy->per_stream_pstate_method[i] == dml2_pstate_method_vblank && ((num_streams_per_base_method[i] > 0 && num_streams_per_variant_method[i] > 0) || num_streams_per_variant_method[i] > 1)) { valid = false; @@ -533,9 +533,12 @@ static bool is_variant_method_valid(const struct dml2_pmo_pstate_strategy *base_ return valid; } -static void expand_variant_strategy(struct dml2_pmo_instance *pmo, +static void expand_variant_strategy( const struct dml2_pmo_pstate_strategy *base_strategy, - unsigned int stream_count) + const unsigned int stream_count, + const bool should_permute, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) { bool variant_found; unsigned int i, j; @@ -544,7 +547,7 @@ static void expand_variant_strategy(struct dml2_pmo_instance *pmo, unsigned int num_streams_per_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; unsigned int num_streams_per_base_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; unsigned int num_streams_per_variant_method[PMO_DCN4_MAX_DISPLAYS] = { 0 }; - enum dml2_pmo_pstate_method per_stream_variant_method[DML2_MAX_PLANES]; + enum dml2_pstate_method per_stream_variant_method[DML2_MAX_PLANES]; struct dml2_pmo_pstate_strategy variant_strategy = { 0 }; /* determine number of displays per method */ @@ -585,7 +588,13 @@ static void expand_variant_strategy(struct dml2_pmo_instance *pmo, } if (variant_found && is_variant_method_valid(base_strategy, &variant_strategy, num_streams_per_base_method, num_streams_per_variant_method, stream_count)) { - expand_base_strategy(pmo, &variant_strategy, stream_count); + if (should_permute) { + /* permutations are permitted, proceed to expand */ + expand_base_strategy(&variant_strategy, stream_count, expanded_strategy_list, num_expanded_strategies); + } else { + /* no permutations allowed, so add to list now */ + insert_strategy_into_expanded_list(&variant_strategy, stream_count, expanded_strategy_list, num_expanded_strategies); + } } /* rollback to earliest method with bases remaining */ @@ -612,18 +621,19 @@ static void expand_variant_strategy(struct dml2_pmo_instance *pmo, } } -static void expand_base_strategies( - struct dml2_pmo_instance *pmo, - const struct dml2_pmo_pstate_strategy *base_strategies_list, - const unsigned int num_base_strategies, - unsigned int stream_count) +void pmo_dcn4_fams2_expand_base_pstate_strategies( + const struct dml2_pmo_pstate_strategy *base_strategies_list, + const unsigned int num_base_strategies, + const unsigned int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies) { unsigned int i; /* expand every explicit base strategy (except all DRR) */ for (i = 0; i < num_base_strategies; i++) { - expand_base_strategy(pmo, &base_strategies_list[i], stream_count); - expand_variant_strategy(pmo, &base_strategies_list[i], stream_count); + expand_base_strategy(&base_strategies_list[i], stream_count, expanded_strategy_list, num_expanded_strategies); + expand_variant_strategy(&base_strategies_list[i], stream_count, true, expanded_strategy_list, num_expanded_strategies); } } @@ -652,25 +662,45 @@ bool pmo_dcn4_fams2_initialize(struct dml2_pmo_initialize_in_out *in_out) DML2_ASSERT(base_strategy_list_1_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ - expand_base_strategies(pmo, base_strategy_list_1_display, base_strategy_list_1_display_size, 1); + pmo_dcn4_fams2_expand_base_pstate_strategies( + base_strategy_list_1_display, + base_strategy_list_1_display_size, + i, + pmo->init_data.pmo_dcn4.expanded_strategy_list_1_display, + &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); break; case 2: DML2_ASSERT(base_strategy_list_2_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ - expand_base_strategies(pmo, base_strategy_list_2_display, base_strategy_list_2_display_size, 2); + pmo_dcn4_fams2_expand_base_pstate_strategies( + base_strategy_list_2_display, + base_strategy_list_2_display_size, + i, + pmo->init_data.pmo_dcn4.expanded_strategy_list_2_display, + &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); break; case 3: DML2_ASSERT(base_strategy_list_3_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ - expand_base_strategies(pmo, base_strategy_list_3_display, base_strategy_list_3_display_size, 3); + pmo_dcn4_fams2_expand_base_pstate_strategies( + base_strategy_list_3_display, + base_strategy_list_3_display_size, + i, + pmo->init_data.pmo_dcn4.expanded_strategy_list_3_display, + &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); break; case 4: DML2_ASSERT(base_strategy_list_4_display_size <= PMO_DCN4_MAX_BASE_STRATEGIES); /* populate list */ - expand_base_strategies(pmo, base_strategy_list_4_display, base_strategy_list_4_display_size, 4); + pmo_dcn4_fams2_expand_base_pstate_strategies( + base_strategy_list_4_display, + base_strategy_list_4_display_size, + i, + pmo->init_data.pmo_dcn4.expanded_strategy_list_4_display, + &pmo->init_data.pmo_dcn4.num_expanded_strategies_per_list[i - 1]); break; } } @@ -941,11 +971,8 @@ static void build_synchronized_timing_groups( /* find synchronizable timing groups */ for (j = i + 1; j < display_config->display_config.num_streams; j++) { if (memcmp(master_timing, - &display_config->display_config.stream_descriptors[j].timing, - sizeof(struct dml2_timing_cfg)) == 0 && - display_config->display_config.stream_descriptors[i].output.output_encoder == display_config->display_config.stream_descriptors[j].output.output_encoder && - (display_config->display_config.stream_descriptors[i].output.output_encoder != dml2_hdmi || //hdmi requires formats match - display_config->display_config.stream_descriptors[i].output.output_format == display_config->display_config.stream_descriptors[j].output.output_format)) { + &display_config->display_config.stream_descriptors[j].timing, + sizeof(struct dml2_timing_cfg)) == 0) { set_bit_in_bitfield(&pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], j); set_bit_in_bitfield(&stream_mapped_mask, j); } @@ -1106,24 +1133,73 @@ static void insert_into_candidate_list(const struct dml2_pmo_pstate_strategy *ps scratch->pmo_dcn4.num_pstate_candidates++; } -static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pmo_pstate_method method) +static enum dml2_pstate_method uclk_pstate_strategy_override_to_pstate_method(const enum dml2_uclk_pstate_change_strategy override_strategy) { - unsigned char i; - enum dml2_uclk_pstate_change_strategy matching_strategy = (enum dml2_uclk_pstate_change_strategy) dml2_pmo_pstate_strategy_na; + enum dml2_pstate_method method = dml2_pstate_method_na; + + switch (override_strategy) { + case dml2_uclk_pstate_change_strategy_force_vactive: + method = dml2_pstate_method_vactive; + break; + case dml2_uclk_pstate_change_strategy_force_vblank: + method = dml2_pstate_method_vblank; + break; + case dml2_uclk_pstate_change_strategy_force_drr: + method = dml2_pstate_method_fw_drr; + break; + case dml2_uclk_pstate_change_strategy_force_mall_svp: + method = dml2_pstate_method_fw_svp; + break; + case dml2_uclk_pstate_change_strategy_force_mall_full_frame: + case dml2_uclk_pstate_change_strategy_auto: + default: + method = dml2_pstate_method_na; + } - if (method == dml2_pmo_pstate_strategy_vactive || method == dml2_pmo_pstate_strategy_fw_vactive_drr) - matching_strategy = dml2_uclk_pstate_change_strategy_force_vactive; - else if (method == dml2_pmo_pstate_strategy_vblank || method == dml2_pmo_pstate_strategy_fw_vblank_drr) - matching_strategy = dml2_uclk_pstate_change_strategy_force_vblank; - else if (method == dml2_pmo_pstate_strategy_fw_svp) - matching_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; - else if (method == dml2_pmo_pstate_strategy_fw_drr) - matching_strategy = dml2_uclk_pstate_change_strategy_force_drr; + return method; +} + +static enum dml2_uclk_pstate_change_strategy pstate_method_to_uclk_pstate_strategy_override(const enum dml2_pstate_method method) +{ + enum dml2_uclk_pstate_change_strategy override_strategy = dml2_uclk_pstate_change_strategy_auto; + + switch (method) { + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_vactive; + break; + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_vblank; + break; + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_mall_svp; + break; + case dml2_pstate_method_fw_drr: + override_strategy = dml2_uclk_pstate_change_strategy_force_drr; + break; + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_count: + case dml2_pstate_method_na: + default: + override_strategy = dml2_uclk_pstate_change_strategy_auto; + } + + return override_strategy; +} + +static bool all_planes_match_method(const struct display_configuation_with_meta *display_cfg, int plane_mask, enum dml2_pstate_method method) +{ + unsigned char i; for (i = 0; i < DML2_MAX_PLANES; i++) { if (is_bit_set_in_bitfield(plane_mask, i)) { if (display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto && - display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != matching_strategy) + display_cfg->display_config.plane_descriptors[i].overrides.uclk_pstate_change_strategy != pstate_method_to_uclk_pstate_strategy_override(method)) return false; } } @@ -1149,32 +1225,33 @@ static void build_method_scheduling_params( static struct dml2_fams2_per_method_common_meta *get_per_method_common_meta( struct dml2_pmo_instance *pmo, - enum dml2_pmo_pstate_method stream_pstate_method, + enum dml2_pstate_method stream_pstate_method, int stream_idx) { struct dml2_fams2_per_method_common_meta *stream_method_fams2_meta = NULL; switch (stream_pstate_method) { - case dml2_pmo_pstate_strategy_vactive: - case dml2_pmo_pstate_strategy_fw_vactive_drr: + case dml2_pstate_method_vactive: + case dml2_pstate_method_fw_vactive_drr: stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vactive.common; break; - case dml2_pmo_pstate_strategy_vblank: - case dml2_pmo_pstate_strategy_fw_vblank_drr: + case dml2_pstate_method_vblank: + case dml2_pstate_method_fw_vblank_drr: stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_vblank.common; break; - case dml2_pmo_pstate_strategy_fw_svp: - case dml2_pmo_pstate_strategy_fw_svp_drr: + case dml2_pstate_method_fw_svp: + case dml2_pstate_method_fw_svp_drr: stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_subvp.common; break; - case dml2_pmo_pstate_strategy_fw_drr: + case dml2_pstate_method_fw_drr: stream_method_fams2_meta = &pmo->scratch.pmo_dcn4.stream_fams2_meta[stream_idx].method_drr.common; break; - case dml2_pmo_pstate_strategy_reserved_hw: - case dml2_pmo_pstate_strategy_reserved_fw: - case dml2_pmo_pstate_strategy_reserved_fw_drr_clamped: - case dml2_pmo_pstate_strategy_reserved_fw_drr_var: - case dml2_pmo_pstate_strategy_na: + case dml2_pstate_method_reserved_hw: + case dml2_pstate_method_reserved_fw: + case dml2_pstate_method_reserved_fw_drr_clamped: + case dml2_pstate_method_reserved_fw_drr_var: + case dml2_pstate_method_count: + case dml2_pstate_method_na: default: stream_method_fams2_meta = NULL; } @@ -1215,7 +1292,7 @@ static bool is_timing_group_schedulable( if (is_bit_set_in_bitfield(pmo->scratch.pmo_dcn4.synchronized_timing_group_masks[timing_group_idx], i)) { stream_method_fams2_meta = get_per_method_common_meta(pmo, pstate_strategy->per_stream_pstate_method[i], i); if (!stream_method_fams2_meta) - return false; + continue; if (group_fams2_meta->allow_start_otg_vline < stream_method_fams2_meta->allow_start_otg_vline) { /* set group allow start to larger otg vline */ @@ -1295,7 +1372,7 @@ static bool is_config_schedulable( if (j_disallow_us < jp1_disallow_us) { /* swap as A < B */ swap(s->pmo_dcn4.sorted_group_gtl_disallow_index[j], - s->pmo_dcn4.sorted_group_gtl_disallow_index[j+1]); + s->pmo_dcn4.sorted_group_gtl_disallow_index[j+1]); swapped = true; } } @@ -1354,7 +1431,7 @@ static bool is_config_schedulable( if (j_period_us < jp1_period_us) { /* swap as A < B */ swap(s->pmo_dcn4.sorted_group_gtl_period_index[j], - s->pmo_dcn4.sorted_group_gtl_period_index[j+1]); + s->pmo_dcn4.sorted_group_gtl_period_index[j+1]); swapped = true; } } @@ -1413,7 +1490,7 @@ static bool is_config_schedulable( static bool stream_matches_drr_policy(struct dml2_pmo_instance *pmo, const struct display_configuation_with_meta *display_cfg, - const enum dml2_pmo_pstate_method stream_pstate_method, + const enum dml2_pstate_method stream_pstate_method, unsigned int stream_index) { const struct dml2_stream_parameters *stream_descriptor = &display_cfg->display_config.stream_descriptors[stream_index]; @@ -1494,19 +1571,19 @@ static bool validate_pstate_support_strategy_cofunctionality(struct dml2_pmo_ins strategy_matches_drr_requirements &= stream_matches_drr_policy(pmo, display_cfg, pstate_strategy->per_stream_pstate_method[stream_index], stream_index); - if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp || - pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { svp_count++; set_bit_in_bitfield(&svp_stream_mask, stream_index); - } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { drr_count++; set_bit_in_bitfield(&drr_stream_mask, stream_index); - } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive || - pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { vactive_count++; set_bit_in_bitfield(&vactive_stream_mask, stream_index); - } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank || - pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank || + pstate_strategy->per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { vblank_count++; set_bit_in_bitfield(&vblank_stream_mask, stream_index); } @@ -1625,7 +1702,7 @@ static void build_fams2_meta_per_stream(struct dml2_pmo_instance *pmo, /* for single stream, guarantee at least an instant of allow */ stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = (unsigned int)math_floor( math_max2(0.0, - timing->v_active - stream_fams2_meta->min_allow_width_otg_vlines - stream_fams2_meta->dram_clk_change_blackout_otg_vlines)); + timing->v_active - math_max2(1.0, stream_fams2_meta->min_allow_width_otg_vlines) - stream_fams2_meta->dram_clk_change_blackout_otg_vlines)); } else { /* for multi stream, bound to a max fill time defined by IP caps */ stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_otg_vlines = @@ -1738,8 +1815,10 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp struct display_configuation_with_meta *display_config; const struct dml2_plane_parameters *plane_descriptor; const struct dml2_pmo_pstate_strategy *strategy_list = NULL; + struct dml2_pmo_pstate_strategy override_base_strategy = { 0 }; unsigned int strategy_list_size = 0; unsigned char plane_index, stream_index, i; + bool build_override_strategy = true; state->performed = true; in_out->base_display_config->stage3.min_clk_index_for_latency = in_out->base_display_config->stage1.min_clk_index_for_latency; @@ -1763,7 +1842,11 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp set_bit_in_bitfield(&s->pmo_dcn4.stream_plane_mask[plane_descriptor->stream_index], plane_index); - state->pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; + state->pstate_switch_modes[plane_index] = dml2_pstate_method_vactive; + + build_override_strategy &= plane_descriptor->overrides.uclk_pstate_change_strategy != dml2_uclk_pstate_change_strategy_auto; + override_base_strategy.per_stream_pstate_method[plane_descriptor->stream_index] = + uclk_pstate_strategy_override_to_pstate_method(plane_descriptor->overrides.uclk_pstate_change_strategy); } // Figure out which streams can do vactive, and also build up implicit SVP and FAMS2 meta @@ -1781,13 +1864,30 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp /* get synchronized timing groups */ build_synchronized_timing_groups(pmo, display_config); - strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams); - if (!strategy_list) - return false; - - strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams); + if (build_override_strategy) { + /* build expanded override strategy list (no permutations) */ + override_base_strategy.allow_state_increase = true; + s->pmo_dcn4.num_expanded_override_strategies = 0; + insert_strategy_into_expanded_list(&override_base_strategy, + display_config->display_config.num_streams, + s->pmo_dcn4.expanded_override_strategy_list, + &s->pmo_dcn4.num_expanded_override_strategies); + expand_variant_strategy(&override_base_strategy, + display_config->display_config.num_streams, + false, + s->pmo_dcn4.expanded_override_strategy_list, + &s->pmo_dcn4.num_expanded_override_strategies); + + /* use override strategy list */ + strategy_list = s->pmo_dcn4.expanded_override_strategy_list; + strategy_list_size = s->pmo_dcn4.num_expanded_override_strategies; + } else { + /* use predefined strategy list */ + strategy_list = get_expanded_strategy_list(&pmo->init_data, display_config->display_config.num_streams); + strategy_list_size = get_num_expanded_strategies(&pmo->init_data, display_config->display_config.num_streams); + } - if (strategy_list_size == 0) + if (!strategy_list || strategy_list_size == 0) return false; s->pmo_dcn4.num_pstate_candidates = 0; @@ -1799,7 +1899,7 @@ bool pmo_dcn4_fams2_init_for_pstate_support(struct dml2_pmo_init_for_pstate_supp } if (s->pmo_dcn4.num_pstate_candidates > 0) { - s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.num_pstate_candidates - 1].allow_state_increase = true; + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.num_pstate_candidates-1].allow_state_increase = true; s->pmo_dcn4.cur_pstate_candidate = -1; return true; } else { @@ -1832,7 +1932,7 @@ static void reset_display_configuration(struct display_configuation_with_meta *d // Reset strategy to auto plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_auto; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_not_supported; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_na; } } @@ -1849,7 +1949,7 @@ static void setup_planes_for_drr_by_mask(struct display_configuation_with_meta * plane->overrides.uclk_pstate_change_strategy = dml2_uclk_pstate_change_strategy_force_drr; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_drr; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_drr; } } @@ -1867,7 +1967,7 @@ static void setup_planes_for_svp_by_mask(struct display_configuation_with_meta * for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { if (is_bit_set_in_bitfield(plane_mask, plane_index)) { stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_subvp_phantom; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_svp; } } @@ -1890,7 +1990,7 @@ static void setup_planes_for_svp_drr_by_mask(struct display_configuation_with_me for (plane_index = 0; plane_index < display_config->display_config.num_planes; plane_index++) { if (is_bit_set_in_bitfield(plane_mask, plane_index)) { stream_index = (char)display_config->display_config.plane_descriptors[plane_index].stream_index; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_subvp_phantom_drr; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_svp_drr; } } @@ -1915,7 +2015,7 @@ static void setup_planes_for_vblank_by_mask(struct display_configuation_with_met plane->overrides.reserved_vblank_time_ns = (long)math_max2(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000.0, plane->overrides.reserved_vblank_time_ns); - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vblank; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vblank; } } @@ -1933,7 +2033,7 @@ static void setup_planes_for_vblank_drr_by_mask(struct display_configuation_with plane = &display_config->display_config.plane_descriptors[plane_index]; plane->overrides.reserved_vblank_time_ns = (long)(pmo->soc_bb->power_management_parameters.dram_clk_change_blackout_us * 1000); - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_vblank_drr; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vblank_drr; } } } @@ -1949,7 +2049,7 @@ static void setup_planes_for_vactive_by_mask(struct display_configuation_with_me if (is_bit_set_in_bitfield(plane_mask, plane_index)) { stream_index = display_config->display_config.plane_descriptors[plane_index].stream_index; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_vactive; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_vactive; if (!pmo->options->disable_vactive_det_fill_bw_pad) { display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us = @@ -1970,7 +2070,7 @@ static void setup_planes_for_vactive_drr_by_mask(struct display_configuation_wit if (is_bit_set_in_bitfield(plane_mask, plane_index)) { stream_index = display_config->display_config.plane_descriptors[plane_index].stream_index; - display_config->stage3.pstate_switch_modes[plane_index] = dml2_uclk_pstate_support_method_fw_vactive_drr; + display_config->stage3.pstate_switch_modes[plane_index] = dml2_pstate_method_fw_vactive_drr; if (!pmo->options->disable_vactive_det_fill_bw_pad) { display_config->display_config.plane_descriptors[plane_index].overrides.max_vactive_det_fill_delay_us = @@ -1992,26 +2092,26 @@ static bool setup_display_config(struct display_configuation_with_meta *display_ for (stream_index = 0; stream_index < display_config->display_config.num_streams; stream_index++) { - if (pmo->scratch.pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_na) { + if (pmo->scratch.pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_na) { success = false; break; - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive) { setup_planes_for_vactive_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank) { setup_planes_for_vblank_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp) { fams2_required = true; setup_planes_for_svp_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { fams2_required = true; setup_planes_for_vactive_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { fams2_required = true; setup_planes_for_vblank_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { fams2_required = true; setup_planes_for_svp_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); - } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { + } else if (scratch->pmo_dcn4.pstate_strategy_candidates[strategy_index].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { fams2_required = true; setup_planes_for_drr_by_mask(display_config, pmo, scratch->pmo_dcn4.stream_plane_mask[stream_index]); } @@ -2066,34 +2166,34 @@ bool pmo_dcn4_fams2_test_for_pstate_support(struct dml2_pmo_test_for_pstate_supp for (stream_index = 0; stream_index < in_out->base_display_config->display_config.num_streams; stream_index++) { struct dml2_fams2_meta *stream_fams2_meta = &s->pmo_dcn4.stream_fams2_meta[stream_index]; - if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vactive || - s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vactive_drr) { + if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vactive || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vactive_drr) { if (get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < (MIN_VACTIVE_MARGIN_PCT * in_out->instance->soc_bb->power_management_parameters.dram_clk_change_blackout_us) || get_vactive_det_fill_latency_delay_us(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) > stream_fams2_meta->method_vactive.max_vactive_det_fill_delay_us) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_vblank || - s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_vblank_drr) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_vblank || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_vblank_drr) { if (get_minimum_reserved_time_us_for_planes(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < REQUIRED_RESERVED_TIME || get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_VBLANK) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp || - s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_svp_drr) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp || + s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_svp_drr) { if (in_out->base_display_config->stage3.stream_svp_meta[stream_index].valid == false) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_fw_drr) { - if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pmo_pstate_strategy_fw_drr) || + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_fw_drr) { + if (!all_planes_match_method(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index], dml2_pstate_method_fw_drr) || get_vactive_pstate_margin(in_out->base_display_config, s->pmo_dcn4.stream_plane_mask[stream_index]) < MIN_VACTIVE_MARGIN_DRR) { p_state_supported = false; break; } - } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pmo_pstate_strategy_na) { + } else if (s->pmo_dcn4.pstate_strategy_candidates[s->pmo_dcn4.cur_pstate_candidate].per_stream_pstate_method[stream_index] == dml2_pstate_method_na) { p_state_supported = false; break; } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h index 0c25bd3e9ac0..6baab7ad6ecc 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_dcn4_fams2.h @@ -23,4 +23,11 @@ bool pmo_dcn4_fams2_init_for_stutter(struct dml2_pmo_init_for_stutter_in_out *in bool pmo_dcn4_fams2_test_for_stutter(struct dml2_pmo_test_for_stutter_in_out *in_out); bool pmo_dcn4_fams2_optimize_for_stutter(struct dml2_pmo_optimize_for_stutter_in_out *in_out); +void pmo_dcn4_fams2_expand_base_pstate_strategies( + const struct dml2_pmo_pstate_strategy *base_strategies_list, + const unsigned int num_base_strategies, + const unsigned int stream_count, + struct dml2_pmo_pstate_strategy *expanded_strategy_list, + unsigned int *num_expanded_strategies); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c index add51d41a515..7ed0242a4b33 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_pmo/dml2_pmo_factory.c @@ -72,7 +72,6 @@ bool dml2_pmo_create(enum dml2_project_id project_id, struct dml2_pmo_instance * out->init_for_stutter = pmo_dcn4_fams2_init_for_stutter; out->test_for_stutter = pmo_dcn4_fams2_test_for_stutter; out->optimize_for_stutter = pmo_dcn4_fams2_optimize_for_stutter; - result = true; break; case dml2_project_invalid: diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c new file mode 100644 index 000000000000..5f6dfc24df69 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_interfaces.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml_top.h" +#include "dml2_internal_shared_types.h" +#include "dml2_top_soc15.h" + +unsigned int dml2_get_instance_size_bytes(void) +{ + return sizeof(struct dml2_instance); +} + +bool dml2_initialize_instance(struct dml2_initialize_instance_in_out *in_out) +{ + switch (in_out->options.project_id) { + case dml2_project_dcn4x_stage1: + return false; + case dml2_project_dcn4x_stage2: + case dml2_project_dcn4x_stage2_auto_drr_svp: + return dml2_top_soc15_initialize_instance(in_out); + case dml2_project_invalid: + default: + return false; + } +} + +bool dml2_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) +{ + if (!in_out->dml2_instance->funcs.check_mode_supported) + return false; + + return in_out->dml2_instance->funcs.check_mode_supported(in_out); +} + +bool dml2_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) +{ + if (!in_out->dml2_instance->funcs.build_mode_programming) + return false; + + return in_out->dml2_instance->funcs.build_mode_programming(in_out); +} + +bool dml2_build_mcache_programming(struct dml2_build_mcache_programming_in_out *in_out) +{ + if (!in_out->dml2_instance->funcs.build_mcache_programming) + return false; + + return in_out->dml2_instance->funcs.build_mcache_programming(in_out); +} + diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c new file mode 100644 index 000000000000..db0a30fdb58d --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.c @@ -0,0 +1,4 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h new file mode 100644 index 000000000000..14d0ae03dce6 --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_legacy.h @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#ifndef __DML2_TOP_LEGACY_H__ +#define __DML2_TOP_LEGACY_H__ +#include "dml2_internal_shared_types.h" +bool dml2_top_legacy_initialize_instance(struct dml2_initialize_instance_in_out *in_out); +#endif /* __DML2_TOP_LEGACY_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c deleted file mode 100644 index d0e026d981b5..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.c +++ /dev/null @@ -1,307 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_top_optimization.h" -#include "dml2_internal_shared_types.h" -#include "dml_top_mcache.h" - -static void copy_display_configuration_with_meta(struct display_configuation_with_meta *dst, const struct display_configuation_with_meta *src) -{ - memcpy(dst, src, sizeof(struct display_configuation_with_meta)); -} - -bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params) -{ - struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; - - state->performed = true; - - return true; -} - -bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; - - return state->min_clk_index_for_latency == 0; -} - -bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params) -{ - bool result = false; - - if (params->display_config->stage1.min_clk_index_for_latency > 0) { - copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); - params->optimized_display_config->stage1.min_clk_index_for_latency--; - result = true; - } - - return result; -} - -bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_test_function_locals *l = params->locals; - bool mcache_success = false; - bool result = false; - - memset(l, 0, sizeof(struct dml2_optimization_test_function_locals)); - - l->test_mcache.calc_mcache_count_params.dml2_instance = params->dml; - l->test_mcache.calc_mcache_count_params.display_config = ¶ms->display_config->display_config; - l->test_mcache.calc_mcache_count_params.mcache_allocations = params->display_config->stage2.mcache_allocations; - - result = dml2_top_mcache_calc_mcache_count_and_offsets(&l->test_mcache.calc_mcache_count_params); // use core to get the basic mcache_allocations - - if (result) { - l->test_mcache.assign_global_mcache_ids_params.allocations = params->display_config->stage2.mcache_allocations; - l->test_mcache.assign_global_mcache_ids_params.num_allocations = params->display_config->display_config.num_planes; - - dml2_top_mcache_assign_global_mcache_ids(&l->test_mcache.assign_global_mcache_ids_params); - - l->test_mcache.validate_admissibility_params.dml2_instance = params->dml; - l->test_mcache.validate_admissibility_params.display_cfg = ¶ms->display_config->display_config; - l->test_mcache.validate_admissibility_params.mcache_allocations = params->display_config->stage2.mcache_allocations; - l->test_mcache.validate_admissibility_params.cfg_support_info = ¶ms->display_config->mode_support_result.cfg_support_info; - - mcache_success = dml2_top_mcache_validate_admissability(&l->test_mcache.validate_admissibility_params); // also find the shift to make mcache allocation works - - memcpy(params->display_config->stage2.per_plane_mcache_support, l->test_mcache.validate_admissibility_params.per_plane_status, sizeof(bool) * DML2_MAX_PLANES); - } - - return mcache_success; -} - -bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params) -{ - struct dml2_optimization_optimize_function_locals *l = params->locals; - bool optimize_success = false; - - if (params->last_candidate_supported == false) - return false; - - copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); - - l->optimize_mcache.optimize_mcache_params.instance = ¶ms->dml->pmo_instance; - l->optimize_mcache.optimize_mcache_params.dcc_mcache_supported = params->display_config->stage2.per_plane_mcache_support; - l->optimize_mcache.optimize_mcache_params.display_config = ¶ms->display_config->display_config; - l->optimize_mcache.optimize_mcache_params.optimized_display_cfg = ¶ms->optimized_display_config->display_config; - l->optimize_mcache.optimize_mcache_params.cfg_support_info = ¶ms->optimized_display_config->mode_support_result.cfg_support_info; - - optimize_success = params->dml->pmo_instance.optimize_dcc_mcache(&l->optimize_mcache.optimize_mcache_params); - - return optimize_success; -} - -bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params) -{ - struct dml2_optimization_init_function_locals *l = params->locals; - - l->vmin.init_params.instance = ¶ms->dml->pmo_instance; - l->vmin.init_params.base_display_config = params->display_config; - return params->dml->pmo_instance.init_for_vmin(&l->vmin.init_params); -} - -bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_test_function_locals *l = params->locals; - - l->test_vmin.pmo_test_vmin_params.instance = ¶ms->dml->pmo_instance; - l->test_vmin.pmo_test_vmin_params.display_config = params->display_config; - l->test_vmin.pmo_test_vmin_params.vmin_limits = ¶ms->dml->soc_bbox.vmin_limit; - return params->dml->pmo_instance.test_for_vmin(&l->test_vmin.pmo_test_vmin_params); -} - -bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params) -{ - struct dml2_optimization_optimize_function_locals *l = params->locals; - - if (params->last_candidate_supported == false) - return false; - - l->optimize_vmin.pmo_optimize_vmin_params.instance = ¶ms->dml->pmo_instance; - l->optimize_vmin.pmo_optimize_vmin_params.base_display_config = params->display_config; - l->optimize_vmin.pmo_optimize_vmin_params.optimized_display_config = params->optimized_display_config; - return params->dml->pmo_instance.optimize_for_vmin(&l->optimize_vmin.pmo_optimize_vmin_params); -} - -bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) -{ - bool test_passed = false; - bool optimize_succeeded = true; - bool candidate_validation_passed = true; - struct optimization_init_function_params init_params = { 0 }; - struct optimization_test_function_params test_params = { 0 }; - struct optimization_optimize_function_params optimize_params = { 0 }; - - if (!params->dml || - !params->optimize_function || - !params->test_function || - !params->display_config || - !params->optimized_display_config) - return false; - - copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); - - init_params.locals = &l->init_function_locals; - init_params.dml = params->dml; - init_params.display_config = &l->cur_candidate_display_cfg; - - if (params->init_function && !params->init_function(&init_params)) - return false; - - test_params.locals = &l->test_function_locals; - test_params.dml = params->dml; - test_params.display_config = &l->cur_candidate_display_cfg; - - test_passed = params->test_function(&test_params); - - while (!test_passed && optimize_succeeded) { - memset(&optimize_params, 0, sizeof(struct optimization_optimize_function_params)); - - optimize_params.locals = &l->optimize_function_locals; - optimize_params.dml = params->dml; - optimize_params.display_config = &l->cur_candidate_display_cfg; - optimize_params.optimized_display_config = &l->next_candidate_display_cfg; - optimize_params.last_candidate_supported = candidate_validation_passed; - - optimize_succeeded = params->optimize_function(&optimize_params); - - if (optimize_succeeded) { - l->mode_support_params.instance = ¶ms->dml->core_instance; - l->mode_support_params.display_cfg = &l->next_candidate_display_cfg; - l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; - - if (l->next_candidate_display_cfg.stage3.performed) - l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage3.min_clk_index_for_latency; - else - l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage1.min_clk_index_for_latency; - - candidate_validation_passed = params->dml->core_instance.mode_support(&l->mode_support_params); - - l->next_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; - } - - if (optimize_succeeded && candidate_validation_passed) { - memset(&test_params, 0, sizeof(struct optimization_test_function_params)); - test_params.locals = &l->test_function_locals; - test_params.dml = params->dml; - test_params.display_config = &l->next_candidate_display_cfg; - test_passed = params->test_function(&test_params); - - copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, &l->next_candidate_display_cfg); - - // If optimization is not all or nothing, then store partial progress in output - if (!params->all_or_nothing) - copy_display_configuration_with_meta(params->optimized_display_config, &l->next_candidate_display_cfg); - } - } - - if (test_passed) - copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); - - return test_passed; -} - -bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) -{ - int highest_state, lowest_state, cur_state; - bool supported = false; - - if (!params->dml || - !params->optimize_function || - !params->test_function || - !params->display_config || - !params->optimized_display_config) - return false; - - copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); - highest_state = l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency; - lowest_state = 0; - - while (highest_state > lowest_state) { - cur_state = (highest_state + lowest_state) / 2; - - l->mode_support_params.instance = ¶ms->dml->core_instance; - l->mode_support_params.display_cfg = &l->cur_candidate_display_cfg; - l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; - l->mode_support_params.min_clk_index = cur_state; - - supported = params->dml->core_instance.mode_support(&l->mode_support_params); - - if (supported) { - l->cur_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; - highest_state = cur_state; - } else { - lowest_state = cur_state + 1; - } - } - l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency = lowest_state; - - copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); - - return true; -} - -bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params) -{ - struct dml2_optimization_init_function_locals *l = params->locals; - - l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; - l->uclk_pstate.init_params.base_display_config = params->display_config; - - return params->dml->pmo_instance.init_for_uclk_pstate(&l->uclk_pstate.init_params); -} - -bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_test_function_locals *l = params->locals; - - l->uclk_pstate.test_params.instance = ¶ms->dml->pmo_instance; - l->uclk_pstate.test_params.base_display_config = params->display_config; - - return params->dml->pmo_instance.test_for_uclk_pstate(&l->uclk_pstate.test_params); -} - -bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params) -{ - struct dml2_optimization_optimize_function_locals *l = params->locals; - - l->uclk_pstate.optimize_params.instance = ¶ms->dml->pmo_instance; - l->uclk_pstate.optimize_params.base_display_config = params->display_config; - l->uclk_pstate.optimize_params.optimized_display_config = params->optimized_display_config; - l->uclk_pstate.optimize_params.last_candidate_failed = !params->last_candidate_supported; - - return params->dml->pmo_instance.optimize_for_uclk_pstate(&l->uclk_pstate.optimize_params); -} - -bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params) -{ - struct dml2_optimization_init_function_locals *l = params->locals; - - l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; - l->uclk_pstate.init_params.base_display_config = params->display_config; - - return params->dml->pmo_instance.init_for_stutter(&l->stutter.stutter_params); -} - -bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params) -{ - struct dml2_optimization_test_function_locals *l = params->locals; - - l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; - l->stutter.stutter_params.base_display_config = params->display_config; - return params->dml->pmo_instance.test_for_stutter(&l->stutter.stutter_params); -} - -bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params) -{ - struct dml2_optimization_optimize_function_locals *l = params->locals; - - l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; - l->stutter.stutter_params.base_display_config = params->display_config; - l->stutter.stutter_params.optimized_display_config = params->optimized_display_config; - l->stutter.stutter_params.last_candidate_failed = !params->last_candidate_supported; - return params->dml->pmo_instance.optimize_for_stutter(&l->stutter.stutter_params); -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h deleted file mode 100644 index 9f22ab33eab1..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_optimization.h +++ /dev/null @@ -1,33 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML2_TOP_OPTIMIZATION_H__ -#define __DML2_TOP_OPTIMIZATION_H__ - -#include "dml2_external_lib_deps.h" -#include "dml2_internal_shared_types.h" - -bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params); -bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params); - -bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params); -bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params); -bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params); - -bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params); -bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params); - -bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params); -bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params); -bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params); - -bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params); -bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params); -bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params); - -bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params); -bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params); -bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params); - -#endif diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c new file mode 100644 index 000000000000..b39029c0e56f --- /dev/null +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.c @@ -0,0 +1,1177 @@ +// SPDX-License-Identifier: MIT +// +// Copyright 2024 Advanced Micro Devices, Inc. + +#include "dml2_top_soc15.h" +#include "dml2_mcg_factory.h" +#include "dml2_dpmm_factory.h" +#include "dml2_core_factory.h" +#include "dml2_pmo_factory.h" +#include "lib_float_math.h" +#include "dml2_debug.h" +static void setup_unoptimized_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config) +{ + memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg)); + out->stage1.min_clk_index_for_latency = dml->min_clk_table.dram_bw_table.num_entries - 1; //dml->min_clk_table.clean_me_up.soc_bb.num_states - 1; +} + +static void setup_speculative_display_config_with_meta(const struct dml2_instance *dml, struct display_configuation_with_meta *out, const struct dml2_display_cfg *display_config) +{ + memcpy(&out->display_config, display_config, sizeof(struct dml2_display_cfg)); + out->stage1.min_clk_index_for_latency = 0; +} + +static void copy_display_configuration_with_meta(struct display_configuation_with_meta *dst, const struct display_configuation_with_meta *src) +{ + memcpy(dst, src, sizeof(struct display_configuation_with_meta)); +} + +static bool dml2_top_optimization_init_function_min_clk_for_latency(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; + + state->performed = true; + + return true; +} + +static bool dml2_top_optimization_test_function_min_clk_for_latency(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_stage1_state *state = ¶ms->display_config->stage1; + + return state->min_clk_index_for_latency == 0; +} + +static bool dml2_top_optimization_optimize_function_min_clk_for_latency(const struct optimization_optimize_function_params *params) +{ + bool result = false; + + if (params->display_config->stage1.min_clk_index_for_latency > 0) { + copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); + params->optimized_display_config->stage1.min_clk_index_for_latency--; + result = true; + } + + return result; +} + +static bool dml2_top_optimization_test_function_mcache(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + bool mcache_success = false; + bool result = false; + + memset(l, 0, sizeof(struct dml2_optimization_test_function_locals)); + + l->test_mcache.calc_mcache_count_params.dml2_instance = params->dml; + l->test_mcache.calc_mcache_count_params.display_config = ¶ms->display_config->display_config; + l->test_mcache.calc_mcache_count_params.mcache_allocations = params->display_config->stage2.mcache_allocations; + + result = dml2_top_mcache_calc_mcache_count_and_offsets(&l->test_mcache.calc_mcache_count_params); // use core to get the basic mcache_allocations + + if (result) { + l->test_mcache.assign_global_mcache_ids_params.allocations = params->display_config->stage2.mcache_allocations; + l->test_mcache.assign_global_mcache_ids_params.num_allocations = params->display_config->display_config.num_planes; + + dml2_top_mcache_assign_global_mcache_ids(&l->test_mcache.assign_global_mcache_ids_params); + + l->test_mcache.validate_admissibility_params.dml2_instance = params->dml; + l->test_mcache.validate_admissibility_params.display_cfg = ¶ms->display_config->display_config; + l->test_mcache.validate_admissibility_params.mcache_allocations = params->display_config->stage2.mcache_allocations; + l->test_mcache.validate_admissibility_params.cfg_support_info = ¶ms->display_config->mode_support_result.cfg_support_info; + + mcache_success = dml2_top_mcache_validate_admissability(&l->test_mcache.validate_admissibility_params); // also find the shift to make mcache allocation works + + memcpy(params->display_config->stage2.per_plane_mcache_support, l->test_mcache.validate_admissibility_params.per_plane_status, sizeof(bool) * DML2_MAX_PLANES); + } + + return mcache_success; +} + +static bool dml2_top_optimization_optimize_function_mcache(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + bool optimize_success = false; + + if (params->last_candidate_supported == false) + return false; + + copy_display_configuration_with_meta(params->optimized_display_config, params->display_config); + + l->optimize_mcache.optimize_mcache_params.instance = ¶ms->dml->pmo_instance; + l->optimize_mcache.optimize_mcache_params.dcc_mcache_supported = params->display_config->stage2.per_plane_mcache_support; + l->optimize_mcache.optimize_mcache_params.display_config = ¶ms->display_config->display_config; + l->optimize_mcache.optimize_mcache_params.optimized_display_cfg = ¶ms->optimized_display_config->display_config; + l->optimize_mcache.optimize_mcache_params.cfg_support_info = ¶ms->optimized_display_config->mode_support_result.cfg_support_info; + + optimize_success = params->dml->pmo_instance.optimize_dcc_mcache(&l->optimize_mcache.optimize_mcache_params); + + return optimize_success; +} + +static bool dml2_top_optimization_init_function_vmin(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->vmin.init_params.instance = ¶ms->dml->pmo_instance; + l->vmin.init_params.base_display_config = params->display_config; + return params->dml->pmo_instance.init_for_vmin(&l->vmin.init_params); +} + +static bool dml2_top_optimization_test_function_vmin(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->test_vmin.pmo_test_vmin_params.instance = ¶ms->dml->pmo_instance; + l->test_vmin.pmo_test_vmin_params.display_config = params->display_config; + l->test_vmin.pmo_test_vmin_params.vmin_limits = ¶ms->dml->soc_bbox.vmin_limit; + return params->dml->pmo_instance.test_for_vmin(&l->test_vmin.pmo_test_vmin_params); +} + +static bool dml2_top_optimization_optimize_function_vmin(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + if (params->last_candidate_supported == false) + return false; + + l->optimize_vmin.pmo_optimize_vmin_params.instance = ¶ms->dml->pmo_instance; + l->optimize_vmin.pmo_optimize_vmin_params.base_display_config = params->display_config; + l->optimize_vmin.pmo_optimize_vmin_params.optimized_display_config = params->optimized_display_config; + return params->dml->pmo_instance.optimize_for_vmin(&l->optimize_vmin.pmo_optimize_vmin_params); +} + +static bool dml2_top_optimization_init_function_uclk_pstate(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.init_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.init_for_uclk_pstate(&l->uclk_pstate.init_params); +} + +static bool dml2_top_optimization_test_function_uclk_pstate(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->uclk_pstate.test_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.test_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.test_for_uclk_pstate(&l->uclk_pstate.test_params); +} + +static bool dml2_top_optimization_optimize_function_uclk_pstate(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + l->uclk_pstate.optimize_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.optimize_params.base_display_config = params->display_config; + l->uclk_pstate.optimize_params.optimized_display_config = params->optimized_display_config; + l->uclk_pstate.optimize_params.last_candidate_failed = !params->last_candidate_supported; + + return params->dml->pmo_instance.optimize_for_uclk_pstate(&l->uclk_pstate.optimize_params); +} + +static bool dml2_top_optimization_init_function_stutter(const struct optimization_init_function_params *params) +{ + struct dml2_optimization_init_function_locals *l = params->locals; + + l->uclk_pstate.init_params.instance = ¶ms->dml->pmo_instance; + l->uclk_pstate.init_params.base_display_config = params->display_config; + + return params->dml->pmo_instance.init_for_stutter(&l->stutter.stutter_params); +} + +static bool dml2_top_optimization_test_function_stutter(const struct optimization_test_function_params *params) +{ + struct dml2_optimization_test_function_locals *l = params->locals; + + l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; + l->stutter.stutter_params.base_display_config = params->display_config; + return params->dml->pmo_instance.test_for_stutter(&l->stutter.stutter_params); +} + +static bool dml2_top_optimization_optimize_function_stutter(const struct optimization_optimize_function_params *params) +{ + struct dml2_optimization_optimize_function_locals *l = params->locals; + + l->stutter.stutter_params.instance = ¶ms->dml->pmo_instance; + l->stutter.stutter_params.base_display_config = params->display_config; + l->stutter.stutter_params.optimized_display_config = params->optimized_display_config; + l->stutter.stutter_params.last_candidate_failed = !params->last_candidate_supported; + return params->dml->pmo_instance.optimize_for_stutter(&l->stutter.stutter_params); +} + +static bool dml2_top_optimization_perform_optimization_phase(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) +{ + bool test_passed = false; + bool optimize_succeeded = true; + bool candidate_validation_passed = true; + struct optimization_init_function_params init_params = { 0 }; + struct optimization_test_function_params test_params = { 0 }; + struct optimization_optimize_function_params optimize_params = { 0 }; + + if (!params->dml || + !params->optimize_function || + !params->test_function || + !params->display_config || + !params->optimized_display_config) + return false; + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); + + init_params.locals = &l->init_function_locals; + init_params.dml = params->dml; + init_params.display_config = &l->cur_candidate_display_cfg; + + if (params->init_function && !params->init_function(&init_params)) + return false; + + test_params.locals = &l->test_function_locals; + test_params.dml = params->dml; + test_params.display_config = &l->cur_candidate_display_cfg; + + test_passed = params->test_function(&test_params); + + while (!test_passed && optimize_succeeded) { + memset(&optimize_params, 0, sizeof(struct optimization_optimize_function_params)); + + optimize_params.locals = &l->optimize_function_locals; + optimize_params.dml = params->dml; + optimize_params.display_config = &l->cur_candidate_display_cfg; + optimize_params.optimized_display_config = &l->next_candidate_display_cfg; + optimize_params.last_candidate_supported = candidate_validation_passed; + + optimize_succeeded = params->optimize_function(&optimize_params); + + if (optimize_succeeded) { + l->mode_support_params.instance = ¶ms->dml->core_instance; + l->mode_support_params.display_cfg = &l->next_candidate_display_cfg; + l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; + + if (l->next_candidate_display_cfg.stage3.performed) + l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage3.min_clk_index_for_latency; + else + l->mode_support_params.min_clk_index = l->next_candidate_display_cfg.stage1.min_clk_index_for_latency; + candidate_validation_passed = params->dml->core_instance.mode_support(&l->mode_support_params); + l->next_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; + } + + if (optimize_succeeded && candidate_validation_passed) { + memset(&test_params, 0, sizeof(struct optimization_test_function_params)); + test_params.locals = &l->test_function_locals; + test_params.dml = params->dml; + test_params.display_config = &l->next_candidate_display_cfg; + test_passed = params->test_function(&test_params); + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, &l->next_candidate_display_cfg); + + // If optimization is not all or nothing, then store partial progress in output + if (!params->all_or_nothing) + copy_display_configuration_with_meta(params->optimized_display_config, &l->next_candidate_display_cfg); + } + } + + if (test_passed) + copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); + + return test_passed; +} + +static bool dml2_top_optimization_perform_optimization_phase_1(struct dml2_optimization_phase_locals *l, const struct optimization_phase_params *params) +{ + int highest_state, lowest_state, cur_state; + bool supported = false; + + if (!params->dml || + !params->optimize_function || + !params->test_function || + !params->display_config || + !params->optimized_display_config) + return false; + + copy_display_configuration_with_meta(&l->cur_candidate_display_cfg, params->display_config); + highest_state = l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency; + lowest_state = 0; + + while (highest_state > lowest_state) { + cur_state = (highest_state + lowest_state) / 2; + + l->mode_support_params.instance = ¶ms->dml->core_instance; + l->mode_support_params.display_cfg = &l->cur_candidate_display_cfg; + l->mode_support_params.min_clk_table = ¶ms->dml->min_clk_table; + l->mode_support_params.min_clk_index = cur_state; + supported = params->dml->core_instance.mode_support(&l->mode_support_params); + + if (supported) { + l->cur_candidate_display_cfg.mode_support_result = l->mode_support_params.mode_support_result; + highest_state = cur_state; + } else { + lowest_state = cur_state + 1; + } + } + l->cur_candidate_display_cfg.stage1.min_clk_index_for_latency = lowest_state; + + copy_display_configuration_with_meta(params->optimized_display_config, &l->cur_candidate_display_cfg); + + return true; +} + +/* +* Takes an input set of mcache boundaries and finds the appropriate setting of cache programming. +* Returns true if a valid set of programming can be made, and false otherwise. "Valid" means +* that the horizontal viewport does not span more than 2 cache slices. +* +* It optionally also can apply a constant shift to all the cache boundaries. +*/ +static const uint32_t MCACHE_ID_UNASSIGNED = 0xF; +static const uint32_t SPLIT_LOCATION_UNDEFINED = 0xFFFF; + +static bool calculate_first_second_splitting(const int *mcache_boundaries, int num_boundaries, int shift, + int pipe_h_vp_start, int pipe_h_vp_end, int *first_offset, int *second_offset) +{ + const int MAX_VP = 0xFFFFFF; + int left_cache_id; + int right_cache_id; + int range_start; + int range_end; + bool success = false; + + if (num_boundaries <= 1) { + if (first_offset && second_offset) { + *first_offset = 0; + *second_offset = -1; + } + success = true; + return success; + } else { + range_start = 0; + for (left_cache_id = 0; left_cache_id < num_boundaries; left_cache_id++) { + range_end = mcache_boundaries[left_cache_id] - shift - 1; + + if (range_start <= pipe_h_vp_start && pipe_h_vp_start <= range_end) + break; + + range_start = range_end + 1; + } + + range_end = MAX_VP; + for (right_cache_id = num_boundaries - 1; right_cache_id >= -1; right_cache_id--) { + if (right_cache_id >= 0) + range_start = mcache_boundaries[right_cache_id] - shift; + else + range_start = 0; + + if (range_start <= pipe_h_vp_end && pipe_h_vp_end <= range_end) { + break; + } + range_end = range_start - 1; + } + right_cache_id = (right_cache_id + 1) % num_boundaries; + + if (right_cache_id == left_cache_id) { + if (first_offset && second_offset) { + *first_offset = left_cache_id; + *second_offset = -1; + } + success = true; + } else if (right_cache_id == (left_cache_id + 1) % num_boundaries) { + if (first_offset && second_offset) { + *first_offset = left_cache_id; + *second_offset = right_cache_id; + } + success = true; + } + } + + return success; +} + +/* +* For a given set of pipe start/end x positions, checks to see it can support the input mcache splitting. +* It also attempts to "optimize" by finding a shift if the default 0 shift does not work. +*/ +static bool find_shift_for_valid_cache_id_assignment(int *mcache_boundaries, unsigned int num_boundaries, + int *pipe_vp_startx, int *pipe_vp_endx, unsigned int pipe_count, int shift_granularity, int *shift) +{ + int max_shift = 0xFFFF; + unsigned int pipe_index; + unsigned int i, slice_width; + bool success = false; + + for (i = 0; i < num_boundaries; i++) { + if (i == 0) + slice_width = mcache_boundaries[i]; + else + slice_width = mcache_boundaries[i] - mcache_boundaries[i - 1]; + + if (max_shift > (int)slice_width) { + max_shift = slice_width; + } + } + + for (*shift = 0; *shift <= max_shift; *shift += shift_granularity) { + success = true; + for (pipe_index = 0; pipe_index < pipe_count; pipe_index++) { + if (!calculate_first_second_splitting(mcache_boundaries, num_boundaries, *shift, + pipe_vp_startx[pipe_index], pipe_vp_endx[pipe_index], 0, 0)) { + success = false; + break; + } + } + if (success) + break; + } + + return success; +} + +/* +* Counts the number of elements inside input array within the given span length. +* Formally, what is the size of the largest subset of the array where the largest and smallest element +* differ no more than the span. +*/ +static unsigned int count_elements_in_span(int *array, unsigned int array_size, unsigned int span) +{ + unsigned int i; + unsigned int span_start_value; + unsigned int span_start_index; + unsigned int greatest_element_count; + + if (array_size == 0) + return 1; + + if (span == 0) + return array_size > 0 ? 1 : 0; + + span_start_value = 0; + span_start_index = 0; + greatest_element_count = 0; + + while (span_start_index < array_size) { + for (i = span_start_index; i < array_size; i++) { + if (array[i] - span_start_value <= span) { + if (i - span_start_index + 1 > greatest_element_count) { + greatest_element_count = i - span_start_index + 1; + } + } else + break; + } + + span_start_index++; + + if (span_start_index < array_size) { + span_start_value = array[span_start_index - 1] + 1; + } + } + + return greatest_element_count; +} + +static bool calculate_h_split_for_scaling_transform(int full_vp_width, int h_active, int num_pipes, + enum dml2_scaling_transform scaling_transform, int *pipe_vp_x_start, int *pipe_vp_x_end) +{ + int i, slice_width; + const char MAX_SCL_VP_OVERLAP = 3; + bool success = false; + + switch (scaling_transform) { + case dml2_scaling_transform_centered: + case dml2_scaling_transform_aspect_ratio: + case dml2_scaling_transform_fullscreen: + slice_width = full_vp_width / num_pipes; + for (i = 0; i < num_pipes; i++) { + pipe_vp_x_start[i] = i * slice_width; + pipe_vp_x_end[i] = (i + 1) * slice_width - 1; + + if (pipe_vp_x_start[i] < MAX_SCL_VP_OVERLAP) + pipe_vp_x_start[i] = 0; + else + pipe_vp_x_start[i] -= MAX_SCL_VP_OVERLAP; + + if (pipe_vp_x_end[i] > full_vp_width - MAX_SCL_VP_OVERLAP - 1) + pipe_vp_x_end[i] = full_vp_width - 1; + else + pipe_vp_x_end[i] += MAX_SCL_VP_OVERLAP; + } + break; + case dml2_scaling_transform_explicit: + default: + success = false; + break; + } + + return success; +} + +bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params) +{ + struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; + struct dml2_top_mcache_validate_admissability_locals *l = &dml->scratch.mcache_validate_admissability_locals; + + const int MAX_PIXEL_OVERLAP = 6; + int max_per_pipe_vp_p0 = 0; + int max_per_pipe_vp_p1 = 0; + int temp, p0shift, p1shift; + unsigned int plane_index = 0; + unsigned int i; + unsigned int odm_combine_factor; + unsigned int mpc_combine_factor; + unsigned int num_dpps; + unsigned int num_boundaries; + enum dml2_scaling_transform scaling_transform; + const struct dml2_plane_parameters *plane; + const struct dml2_stream_parameters *stream; + + bool p0pass = false; + bool p1pass = false; + bool all_pass = true; + + for (plane_index = 0; plane_index < params->display_cfg->num_planes; plane_index++) { + if (!params->display_cfg->plane_descriptors[plane_index].surface.dcc.enable) + continue; + + plane = ¶ms->display_cfg->plane_descriptors[plane_index]; + stream = ¶ms->display_cfg->stream_descriptors[plane->stream_index]; + + num_dpps = odm_combine_factor = params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; + + if (odm_combine_factor == 1) + num_dpps = mpc_combine_factor = (unsigned int)params->cfg_support_info->plane_support_info[plane_index].dpps_used; + else + mpc_combine_factor = 1; + + if (odm_combine_factor > 1) { + max_per_pipe_vp_p0 = plane->surface.plane0.width; + temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane0.h_ratio * stream->timing.h_active / odm_combine_factor); + if (temp < max_per_pipe_vp_p0) + max_per_pipe_vp_p0 = temp; + + max_per_pipe_vp_p1 = plane->surface.plane1.width; + temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane1.h_ratio * stream->timing.h_active / odm_combine_factor); + + if (temp < max_per_pipe_vp_p1) + max_per_pipe_vp_p1 = temp; + } else { + max_per_pipe_vp_p0 = plane->surface.plane0.width / mpc_combine_factor; + max_per_pipe_vp_p1 = plane->surface.plane1.width / mpc_combine_factor; + } + + max_per_pipe_vp_p0 += 2 * MAX_PIXEL_OVERLAP; + max_per_pipe_vp_p1 += MAX_PIXEL_OVERLAP; + + p0shift = 0; + p1shift = 0; + + // The last element in the unshifted boundary array will always be the first pixel outside the + // plane, which means theres no mcache associated with it, so -1 + num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1; + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, + num_boundaries, max_per_pipe_vp_p0) <= 1) && (num_boundaries <= num_dpps)) { + p0pass = true; + } + num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1; + if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, + num_boundaries, max_per_pipe_vp_p1) <= 1) && (num_boundaries <= num_dpps)) { + p1pass = true; + } + + if (!p0pass || !p1pass) { + if (odm_combine_factor > 1) { + num_dpps = odm_combine_factor; + scaling_transform = plane->composition.scaling_transform; + } else { + num_dpps = mpc_combine_factor; + scaling_transform = dml2_scaling_transform_fullscreen; + } + + if (!p0pass) { + if (plane->composition.viewport.stationary) { + calculate_h_split_for_scaling_transform(plane->surface.plane0.width, + stream->timing.h_active, num_dpps, scaling_transform, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); + p0pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, + params->mcache_allocations[plane_index].num_mcaches_plane0, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index], num_dpps, + params->mcache_allocations[plane_index].shift_granularity.p0, &p0shift); + } + } + if (!p1pass) { + if (plane->composition.viewport.stationary) { + calculate_h_split_for_scaling_transform(plane->surface.plane1.width, + stream->timing.h_active, num_dpps, scaling_transform, + &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); + p1pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, + params->mcache_allocations[plane_index].num_mcaches_plane1, + &l->plane1.pipe_vp_startx[plane_index], &l->plane1.pipe_vp_endx[plane_index], num_dpps, + params->mcache_allocations[plane_index].shift_granularity.p1, &p1shift); + } + } + } + + if (p0pass && p1pass) { + for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane0; i++) { + params->mcache_allocations[plane_index].mcache_x_offsets_plane0[i] -= p0shift; + } + for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane1; i++) { + params->mcache_allocations[plane_index].mcache_x_offsets_plane1[i] -= p1shift; + } + } + + params->per_plane_status[plane_index] = p0pass && p1pass; + all_pass &= p0pass && p1pass; + } + + return all_pass; +} + +static void reset_mcache_allocations(struct dml2_hubp_pipe_mcache_regs *per_plane_pipe_mcache_regs) +{ + // Initialize all entries to special valid MCache ID and special valid split coordinate + per_plane_pipe_mcache_regs->main.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p0.split_location = SPLIT_LOCATION_UNDEFINED; + + per_plane_pipe_mcache_regs->mall.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p0.split_location = SPLIT_LOCATION_UNDEFINED; + + per_plane_pipe_mcache_regs->main.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->main.p1.split_location = SPLIT_LOCATION_UNDEFINED; + + per_plane_pipe_mcache_regs->mall.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; + per_plane_pipe_mcache_regs->mall.p1.split_location = SPLIT_LOCATION_UNDEFINED; +} + +void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params) +{ + int i; + unsigned int j; + int next_unused_cache_id = 0; + + for (i = 0; i < params->num_allocations; i++) { + if (!params->allocations[i].valid) + continue; + + for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { + params->allocations[i].global_mcache_ids_plane0[j] = next_unused_cache_id++; + } + for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { + params->allocations[i].global_mcache_ids_plane1[j] = next_unused_cache_id++; + } + + // The "psuedo-last" slice is always wrapped around + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0] = + params->allocations[i].global_mcache_ids_plane0[0]; + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1] = + params->allocations[i].global_mcache_ids_plane1[0]; + + // If we need dedicated caches for mall requesting, then we assign them here. + if (params->allocations[i].requires_dedicated_mall_mcache) { + for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { + params->allocations[i].global_mcache_ids_mall_plane0[j] = next_unused_cache_id++; + } + for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { + params->allocations[i].global_mcache_ids_mall_plane1[j] = next_unused_cache_id++; + } + + // The "psuedo-last" slice is always wrapped around + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0] = + params->allocations[i].global_mcache_ids_mall_plane0[0]; + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1] = + params->allocations[i].global_mcache_ids_mall_plane1[0]; + } + + // If P0 and P1 are sharing caches, then it means the largest mcache IDs for p0 and p1 can be the same + // since mcache IDs are always ascending, then it means the largest mcacheID of p1 should be the + // largest mcacheID of P0 + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && + params->allocations[i].last_slice_sharing.plane0_plane1) { + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + + // If we need dedicated caches handle last slice sharing + if (params->allocations[i].requires_dedicated_mall_mcache) { + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && + params->allocations[i].last_slice_sharing.plane0_plane1) { + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + // If mall_comb_mcache_l is set then it means that largest mcache ID for MALL p0 can be same as regular read p0 + if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p0) { + params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1] = + params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; + } + // If mall_comb_mcache_c is set then it means that largest mcache ID for MALL p1 can be same as regular + // read p1 (which can be same as regular read p0 if plane0_plane1 is also set) + if (params->allocations[i].num_mcaches_plane1 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p1) { + params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = + params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1]; + } + } + + // If you don't need dedicated mall mcaches, the mall mcache assignments are identical to the normal requesting + if (!params->allocations[i].requires_dedicated_mall_mcache) { + memcpy(params->allocations[i].global_mcache_ids_mall_plane0, params->allocations[i].global_mcache_ids_plane0, + sizeof(params->allocations[i].global_mcache_ids_mall_plane0)); + memcpy(params->allocations[i].global_mcache_ids_mall_plane1, params->allocations[i].global_mcache_ids_plane1, + sizeof(params->allocations[i].global_mcache_ids_mall_plane1)); + } + } +} + +bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params) +{ + struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; + struct dml2_top_mcache_verify_mcache_size_locals *l = &dml->scratch.mcache_verify_mcache_size_locals; + + unsigned int total_mcaches_required; + unsigned int i; + bool result = false; + + if (dml->soc_bbox.num_dcc_mcaches == 0) { + return true; + } + + total_mcaches_required = 0; + l->calc_mcache_params.instance = &dml->core_instance; + for (i = 0; i < params->display_config->num_planes; i++) { + if (!params->display_config->plane_descriptors[i].surface.dcc.enable) { + memset(¶ms->mcache_allocations[i], 0, sizeof(struct dml2_mcache_surface_allocation)); + continue; + } + + l->calc_mcache_params.plane_descriptor = ¶ms->display_config->plane_descriptors[i]; + l->calc_mcache_params.mcache_allocation = ¶ms->mcache_allocations[i]; + l->calc_mcache_params.plane_index = i; + + if (!dml->core_instance.calculate_mcache_allocation(&l->calc_mcache_params)) { + result = false; + break; + } + + if (params->mcache_allocations[i].valid) { + total_mcaches_required += params->mcache_allocations[i].num_mcaches_plane0 + params->mcache_allocations[i].num_mcaches_plane1; + if (params->mcache_allocations[i].last_slice_sharing.plane0_plane1) + total_mcaches_required--; + } + } + dml2_printf("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required); + + if (total_mcaches_required > dml->soc_bbox.num_dcc_mcaches) { + result = false; + } else { + result = true; + } + + return result; +} + +static bool dml2_top_soc15_check_mode_supported(struct dml2_check_mode_supported_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_check_mode_supported_locals *l = &dml->scratch.check_mode_supported_locals; + struct dml2_display_cfg_programming *dpmm_programming = &dml->dpmm_instance.dpmm_scratch.programming; + + bool result = false; + bool mcache_success = false; + memset(dpmm_programming, 0, sizeof(struct dml2_display_cfg_programming)); + + setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + result = dml->core_instance.mode_support(&l->mode_support_params); + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (result) { + struct optimization_phase_params mcache_phase = { + .dml = dml, + .display_config = &l->base_display_config_with_meta, + .test_function = dml2_top_optimization_test_function_mcache, + .optimize_function = dml2_top_optimization_optimize_function_mcache, + .optimized_display_config = &l->optimized_display_config_with_meta, + .all_or_nothing = false, + }; + mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &mcache_phase); + } + + /* + * Call DPMM to map all requirements to minimum clock state + */ + if (result) { + l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; + l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; + l->dppm_map_mode_params.programming = dpmm_programming; + l->dppm_map_mode_params.soc_bb = &dml->soc_bbox; + l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip; + result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params); + } + + in_out->is_supported = mcache_success; + result = result && in_out->is_supported; + + return result; +} + +static bool dml2_top_soc15_build_mode_programming(struct dml2_build_mode_programming_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_build_mode_programming_locals *l = &dml->scratch.build_mode_programming_locals; + + bool result = false; + bool mcache_success = false; + bool uclk_pstate_success = false; + bool vmin_success = false; + bool stutter_success = false; + unsigned int i; + + memset(l, 0, sizeof(struct dml2_build_mode_programming_locals)); + memset(in_out->programming, 0, sizeof(struct dml2_display_cfg_programming)); + + memcpy(&in_out->programming->display_config, in_out->display_config, sizeof(struct dml2_display_cfg)); + + setup_speculative_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + result = dml->core_instance.mode_support(&l->mode_support_params); + + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (!result) { + setup_unoptimized_display_config_with_meta(dml, &l->base_display_config_with_meta, in_out->display_config); + + l->mode_support_params.instance = &dml->core_instance; + l->mode_support_params.display_cfg = &l->base_display_config_with_meta; + l->mode_support_params.min_clk_table = &dml->min_clk_table; + l->mode_support_params.min_clk_index = l->base_display_config_with_meta.stage1.min_clk_index_for_latency; + result = dml->core_instance.mode_support(&l->mode_support_params); + l->base_display_config_with_meta.mode_support_result = l->mode_support_params.mode_support_result; + + if (!result) { + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = false; + dml->core_instance.populate_informative(&l->informative_params); + + return false; + } + + /* + * Phase 1: Determine minimum clocks to satisfy latency requirements for this mode + */ + memset(&l->min_clock_for_latency_phase, 0, sizeof(struct optimization_phase_params)); + l->min_clock_for_latency_phase.dml = dml; + l->min_clock_for_latency_phase.display_config = &l->base_display_config_with_meta; + l->min_clock_for_latency_phase.init_function = dml2_top_optimization_init_function_min_clk_for_latency; + l->min_clock_for_latency_phase.test_function = dml2_top_optimization_test_function_min_clk_for_latency; + l->min_clock_for_latency_phase.optimize_function = dml2_top_optimization_optimize_function_min_clk_for_latency; + l->min_clock_for_latency_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->min_clock_for_latency_phase.all_or_nothing = false; + + dml2_top_optimization_perform_optimization_phase_1(&l->optimization_phase_locals, &l->min_clock_for_latency_phase); + + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + } + + /* + * Phase 2: Satisfy DCC mcache requirements + */ + memset(&l->mcache_phase, 0, sizeof(struct optimization_phase_params)); + l->mcache_phase.dml = dml; + l->mcache_phase.display_config = &l->base_display_config_with_meta; + l->mcache_phase.test_function = dml2_top_optimization_test_function_mcache; + l->mcache_phase.optimize_function = dml2_top_optimization_optimize_function_mcache; + l->mcache_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->mcache_phase.all_or_nothing = true; + + mcache_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->mcache_phase); + + if (!mcache_success) { + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = false; + + dml->core_instance.populate_informative(&l->informative_params); + + in_out->programming->informative.failed_mcache_validation = true; + return false; + } + + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + + /* + * Phase 3: Optimize for Pstate + */ + memset(&l->uclk_pstate_phase, 0, sizeof(struct optimization_phase_params)); + l->uclk_pstate_phase.dml = dml; + l->uclk_pstate_phase.display_config = &l->base_display_config_with_meta; + l->uclk_pstate_phase.init_function = dml2_top_optimization_init_function_uclk_pstate; + l->uclk_pstate_phase.test_function = dml2_top_optimization_test_function_uclk_pstate; + l->uclk_pstate_phase.optimize_function = dml2_top_optimization_optimize_function_uclk_pstate; + l->uclk_pstate_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->uclk_pstate_phase.all_or_nothing = true; + + uclk_pstate_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->uclk_pstate_phase); + + if (uclk_pstate_success) { + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage3.success = true; + } + + /* + * Phase 4: Optimize for Vmin + */ + memset(&l->vmin_phase, 0, sizeof(struct optimization_phase_params)); + l->vmin_phase.dml = dml; + l->vmin_phase.display_config = &l->base_display_config_with_meta; + l->vmin_phase.init_function = dml2_top_optimization_init_function_vmin; + l->vmin_phase.test_function = dml2_top_optimization_test_function_vmin; + l->vmin_phase.optimize_function = dml2_top_optimization_optimize_function_vmin; + l->vmin_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->vmin_phase.all_or_nothing = false; + + vmin_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->vmin_phase); + + if (l->optimized_display_config_with_meta.stage4.performed) { + /* + * when performed is true, optimization has applied to + * optimized_display_config_with_meta and it has passed mode + * support. However it may or may not pass the test function to + * reach actual Vmin. As long as voltage is optimized even if it + * doesn't reach Vmin level, there is still power benefit so in + * this case we will still copy this optimization into base + * display config. + */ + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage4.success = vmin_success; + } + + /* + * Phase 5: Optimize for Stutter + */ + memset(&l->stutter_phase, 0, sizeof(struct optimization_phase_params)); + l->stutter_phase.dml = dml; + l->stutter_phase.display_config = &l->base_display_config_with_meta; + l->stutter_phase.init_function = dml2_top_optimization_init_function_stutter; + l->stutter_phase.test_function = dml2_top_optimization_test_function_stutter; + l->stutter_phase.optimize_function = dml2_top_optimization_optimize_function_stutter; + l->stutter_phase.optimized_display_config = &l->optimized_display_config_with_meta; + l->stutter_phase.all_or_nothing = true; + + stutter_success = dml2_top_optimization_perform_optimization_phase(&l->optimization_phase_locals, &l->stutter_phase); + + if (stutter_success) { + memcpy(&l->base_display_config_with_meta, &l->optimized_display_config_with_meta, sizeof(struct display_configuation_with_meta)); + l->base_display_config_with_meta.stage5.success = true; + } + + /* + * Populate mcache programming + */ + for (i = 0; i < in_out->display_config->num_planes; i++) { + in_out->programming->plane_programming[i].mcache_allocation = l->base_display_config_with_meta.stage2.mcache_allocations[i]; + } + + /* + * Call DPMM to map all requirements to minimum clock state + */ + if (result) { + l->dppm_map_mode_params.min_clk_table = &dml->min_clk_table; + l->dppm_map_mode_params.display_cfg = &l->base_display_config_with_meta; + l->dppm_map_mode_params.programming = in_out->programming; + l->dppm_map_mode_params.soc_bb = &dml->soc_bbox; + l->dppm_map_mode_params.ip = &dml->core_instance.clean_me_up.mode_lib.ip; + result = dml->dpmm_instance.map_mode_to_soc_dpm(&l->dppm_map_mode_params); + if (!result) + in_out->programming->informative.failed_dpmm = true; + } + + if (result) { + l->mode_programming_params.instance = &dml->core_instance; + l->mode_programming_params.display_cfg = &l->base_display_config_with_meta; + l->mode_programming_params.cfg_support_info = &l->base_display_config_with_meta.mode_support_result.cfg_support_info; + l->mode_programming_params.programming = in_out->programming; + result = dml->core_instance.mode_programming(&l->mode_programming_params); + if (!result) + in_out->programming->informative.failed_mode_programming = true; + } + + if (result) { + l->dppm_map_watermarks_params.core = &dml->core_instance; + l->dppm_map_watermarks_params.display_cfg = &l->base_display_config_with_meta; + l->dppm_map_watermarks_params.programming = in_out->programming; + result = dml->dpmm_instance.map_watermarks(&l->dppm_map_watermarks_params); + } + + l->informative_params.instance = &dml->core_instance; + l->informative_params.programming = in_out->programming; + l->informative_params.mode_is_supported = result; + + dml->core_instance.populate_informative(&l->informative_params); + + return result; +} + +bool dml2_top_soc15_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params) +{ + bool success = true; + int config_index, pipe_index; + int first_offset, second_offset; + int free_per_plane_reg_index = 0; + + memset(params->per_plane_pipe_mcache_regs, 0, DML2_MAX_PLANES * DML2_MAX_DCN_PIPES * sizeof(struct dml2_hubp_pipe_mcache_regs *)); + + for (config_index = 0; config_index < params->num_configurations; config_index++) { + for (pipe_index = 0; pipe_index < params->mcache_configurations[config_index].num_pipes; pipe_index++) { + // Allocate storage for the mcache regs + params->per_plane_pipe_mcache_regs[config_index][pipe_index] = ¶ms->mcache_regs_set[free_per_plane_reg_index++]; + + reset_mcache_allocations(params->per_plane_pipe_mcache_regs[config_index][pipe_index]); + + if (params->mcache_configurations[config_index].plane_descriptor->surface.dcc.enable) { + // P0 always enabled + if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0, + params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane0, + 0, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start + + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_width - 1, + &first_offset, &second_offset)) { + success = false; + break; + } + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[first_offset]; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[first_offset]; + + if (second_offset >= 0) { + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; + } + + // Populate P1 if enabled + if (params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1_enabled) { + if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1, + params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane1, + 0, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start, + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start + + params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_width - 1, + &first_offset, &second_offset)) { + success = false; + break; + } + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[first_offset]; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_first = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[first_offset]; + + if (second_offset >= 0) { + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; + + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_second = + params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[second_offset]; + params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.split_location = + params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; + } + } + } + } + } + + return success; +} + +static const struct dml2_top_funcs soc15_funcs = { + .check_mode_supported = dml2_top_soc15_check_mode_supported, + .build_mode_programming = dml2_top_soc15_build_mode_programming, + .build_mcache_programming = dml2_top_soc15_build_mcache_programming, +}; + +bool dml2_top_soc15_initialize_instance(struct dml2_initialize_instance_in_out *in_out) +{ + struct dml2_instance *dml = (struct dml2_instance *)in_out->dml2_instance; + struct dml2_initialize_instance_locals *l = &dml->scratch.initialize_instance_locals; + struct dml2_core_initialize_in_out core_init_params = { 0 }; + struct dml2_mcg_build_min_clock_table_params_in_out mcg_build_min_clk_params = { 0 }; + struct dml2_pmo_initialize_in_out pmo_init_params = { 0 }; + bool result = false; + + memset(l, 0, sizeof(struct dml2_initialize_instance_locals)); + memset(dml, 0, sizeof(struct dml2_instance)); + + memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); + memcpy(&dml->soc_bbox, &in_out->soc_bb, sizeof(struct dml2_soc_bb)); + + dml->project_id = in_out->options.project_id; + dml->pmo_options = in_out->options.pmo_options; + + // Initialize All Components + result = dml2_mcg_create(in_out->options.project_id, &dml->mcg_instance); + + if (result) + result = dml2_dpmm_create(in_out->options.project_id, &dml->dpmm_instance); + + if (result) + result = dml2_core_create(in_out->options.project_id, &dml->core_instance); + + if (result) { + mcg_build_min_clk_params.soc_bb = &in_out->soc_bb; + mcg_build_min_clk_params.min_clk_table = &dml->min_clk_table; + result = dml->mcg_instance.build_min_clock_table(&mcg_build_min_clk_params); + } + + if (result) { + core_init_params.project_id = in_out->options.project_id; + core_init_params.instance = &dml->core_instance; + core_init_params.minimum_clock_table = &dml->min_clk_table; + core_init_params.explicit_ip_bb = in_out->overrides.explicit_ip_bb; + core_init_params.explicit_ip_bb_size = in_out->overrides.explicit_ip_bb_size; + core_init_params.ip_caps = &in_out->ip_caps; + core_init_params.soc_bb = &in_out->soc_bb; + result = dml->core_instance.initialize(&core_init_params); + + if (core_init_params.explicit_ip_bb && core_init_params.explicit_ip_bb_size > 0) { + memcpy(&dml->ip_caps, &in_out->ip_caps, sizeof(struct dml2_ip_capabilities)); + } + } + + if (result) + result = dml2_pmo_create(in_out->options.project_id, &dml->pmo_instance); + + if (result) { + pmo_init_params.instance = &dml->pmo_instance; + pmo_init_params.soc_bb = &dml->soc_bbox; + pmo_init_params.ip_caps = &dml->ip_caps; + pmo_init_params.mcg_clock_table_size = dml->min_clk_table.dram_bw_table.num_entries; + pmo_init_params.options = &dml->pmo_options; + dml->pmo_instance.initialize(&pmo_init_params); + } + dml->funcs = soc15_funcs; + return result; +} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h index 7b1f6f7143d0..6fda201af898 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml2_top_soc15.h @@ -1,23 +1,13 @@ // SPDX-License-Identifier: MIT // // Copyright 2024 Advanced Micro Devices, Inc. - -#ifndef __DML_TOP_MCACHE_H__ -#define __DML_TOP_MCACHE_H__ - -#include "dml2_external_lib_deps.h" -#include "dml_top_display_cfg_types.h" -#include "dml_top_types.h" +#ifndef __DML2_TOP_SOC15_H__ +#define __DML2_TOP_SOC15_H__ #include "dml2_internal_shared_types.h" +bool dml2_top_soc15_initialize_instance(struct dml2_initialize_instance_in_out *in_out); bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params); - void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params); - bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params); - -bool dml2_top_mcache_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params); - -bool dml2_top_mcache_unit_test(void); - -#endif +bool dml2_top_soc15_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params); +#endif /* __DML2_TOP_SOC15_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c deleted file mode 100644 index a342ebfbe4e7..000000000000 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/dml2_top/dml_top_mcache.c +++ /dev/null @@ -1,549 +0,0 @@ -// SPDX-License-Identifier: MIT -// -// Copyright 2024 Advanced Micro Devices, Inc. - -#include "dml2_debug.h" - -#include "dml_top_mcache.h" -#include "lib_float_math.h" - -#include "dml2_internal_shared_types.h" - -/* -* Takes an input set of mcache boundaries and finds the appropriate setting of cache programming. -* Returns true if a valid set of programming can be made, and false otherwise. "Valid" means -* that the horizontal viewport does not span more than 2 cache slices. -* -* It optionally also can apply a constant shift to all the cache boundaries. -*/ -static const uint32_t MCACHE_ID_UNASSIGNED = 0xF; -static const uint32_t SPLIT_LOCATION_UNDEFINED = 0xFFFF; - -static bool calculate_first_second_splitting(const int *mcache_boundaries, int num_boundaries, int shift, - int pipe_h_vp_start, int pipe_h_vp_end, int *first_offset, int *second_offset) -{ - const int MAX_VP = 0xFFFFFF; - int left_cache_id; - int right_cache_id; - int range_start; - int range_end; - bool success = false; - - if (num_boundaries <= 1) { - if (first_offset && second_offset) { - *first_offset = 0; - *second_offset = -1; - } - success = true; - return success; - } else { - range_start = 0; - for (left_cache_id = 0; left_cache_id < num_boundaries; left_cache_id++) { - range_end = mcache_boundaries[left_cache_id] - shift - 1; - - if (range_start <= pipe_h_vp_start && pipe_h_vp_start <= range_end) - break; - - range_start = range_end + 1; - } - - range_end = MAX_VP; - for (right_cache_id = num_boundaries - 1; right_cache_id >= -1; right_cache_id--) { - if (right_cache_id >= 0) - range_start = mcache_boundaries[right_cache_id] - shift; - else - range_start = 0; - - if (range_start <= pipe_h_vp_end && pipe_h_vp_end <= range_end) { - break; - } - range_end = range_start - 1; - } - right_cache_id = (right_cache_id + 1) % num_boundaries; - - if (right_cache_id == left_cache_id) { - if (first_offset && second_offset) { - *first_offset = left_cache_id; - *second_offset = -1; - } - success = true; - } else if (right_cache_id == (left_cache_id + 1) % num_boundaries) { - if (first_offset && second_offset) { - *first_offset = left_cache_id; - *second_offset = right_cache_id; - } - success = true; - } - } - - return success; -} - -/* -* For a given set of pipe start/end x positions, checks to see it can support the input mcache splitting. -* It also attempts to "optimize" by finding a shift if the default 0 shift does not work. -*/ -static bool find_shift_for_valid_cache_id_assignment(int *mcache_boundaries, unsigned int num_boundaries, - int *pipe_vp_startx, int *pipe_vp_endx, unsigned int pipe_count, int shift_granularity, int *shift) -{ - int max_shift = 0xFFFF; - unsigned int pipe_index; - unsigned int i, slice_width; - bool success = false; - - for (i = 0; i < num_boundaries; i++) { - if (i == 0) - slice_width = mcache_boundaries[i]; - else - slice_width = mcache_boundaries[i] - mcache_boundaries[i - 1]; - - if (max_shift > (int)slice_width) { - max_shift = slice_width; - } - } - - for (*shift = 0; *shift <= max_shift; *shift += shift_granularity) { - success = true; - for (pipe_index = 0; pipe_index < pipe_count; pipe_index++) { - if (!calculate_first_second_splitting(mcache_boundaries, num_boundaries, *shift, - pipe_vp_startx[pipe_index], pipe_vp_endx[pipe_index], 0, 0)) { - success = false; - break; - } - } - if (success) - break; - } - - return success; -} - -/* -* Counts the number of elements inside input array within the given span length. -* Formally, what is the size of the largest subset of the array where the largest and smallest element -* differ no more than the span. -*/ -static unsigned int count_elements_in_span(int *array, unsigned int array_size, unsigned int span) -{ - unsigned int i; - unsigned int span_start_value; - unsigned int span_start_index; - unsigned int greatest_element_count; - - if (array_size == 0) - return 1; - - if (span == 0) - return array_size > 0 ? 1 : 0; - - span_start_value = 0; - span_start_index = 0; - greatest_element_count = 0; - - while (span_start_index < array_size) { - for (i = span_start_index; i < array_size; i++) { - if (array[i] - span_start_value <= span) { - if (i - span_start_index + 1 > greatest_element_count) { - greatest_element_count = i - span_start_index + 1; - } - } else - break; - } - - span_start_index++; - - if (span_start_index < array_size) { - span_start_value = array[span_start_index - 1] + 1; - } - } - - return greatest_element_count; -} - -static bool calculate_h_split_for_scaling_transform(int full_vp_width, int h_active, int num_pipes, - enum dml2_scaling_transform scaling_transform, int *pipe_vp_x_start, int *pipe_vp_x_end) -{ - int i, slice_width; - const char MAX_SCL_VP_OVERLAP = 3; - bool success = false; - - switch (scaling_transform) { - case dml2_scaling_transform_centered: - case dml2_scaling_transform_aspect_ratio: - case dml2_scaling_transform_fullscreen: - slice_width = full_vp_width / num_pipes; - for (i = 0; i < num_pipes; i++) { - pipe_vp_x_start[i] = i * slice_width; - pipe_vp_x_end[i] = (i + 1) * slice_width - 1; - - if (pipe_vp_x_start[i] < MAX_SCL_VP_OVERLAP) - pipe_vp_x_start[i] = 0; - else - pipe_vp_x_start[i] -= MAX_SCL_VP_OVERLAP; - - if (pipe_vp_x_end[i] > full_vp_width - MAX_SCL_VP_OVERLAP - 1) - pipe_vp_x_end[i] = full_vp_width - 1; - else - pipe_vp_x_end[i] += MAX_SCL_VP_OVERLAP; - } - break; - case dml2_scaling_transform_explicit: - default: - success = false; - break; - } - - return success; -} - -bool dml2_top_mcache_validate_admissability(struct top_mcache_validate_admissability_in_out *params) -{ - struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; - struct dml2_top_mcache_validate_admissability_locals *l = &dml->scratch.mcache_validate_admissability_locals; - - const int MAX_PIXEL_OVERLAP = 6; - int max_per_pipe_vp_p0 = 0; - int max_per_pipe_vp_p1 = 0; - int temp, p0shift, p1shift; - unsigned int plane_index = 0; - unsigned int i; - unsigned int odm_combine_factor; - unsigned int mpc_combine_factor; - unsigned int num_dpps; - unsigned int num_boundaries; - enum dml2_scaling_transform scaling_transform; - const struct dml2_plane_parameters *plane; - const struct dml2_stream_parameters *stream; - - bool p0pass = false; - bool p1pass = false; - bool all_pass = true; - - for (plane_index = 0; plane_index < params->display_cfg->num_planes; plane_index++) { - if (!params->display_cfg->plane_descriptors[plane_index].surface.dcc.enable) - continue; - - plane = ¶ms->display_cfg->plane_descriptors[plane_index]; - stream = ¶ms->display_cfg->stream_descriptors[plane->stream_index]; - - num_dpps = odm_combine_factor = params->cfg_support_info->stream_support_info[plane->stream_index].odms_used; - - if (odm_combine_factor == 1) - num_dpps = mpc_combine_factor = (unsigned int)params->cfg_support_info->plane_support_info[plane_index].dpps_used; - else - mpc_combine_factor = 1; - - if (odm_combine_factor > 1) { - max_per_pipe_vp_p0 = plane->surface.plane0.width; - temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane0.h_ratio * stream->timing.h_active / odm_combine_factor); - - if (temp < max_per_pipe_vp_p0) - max_per_pipe_vp_p0 = temp; - - max_per_pipe_vp_p1 = plane->surface.plane1.width; - temp = (unsigned int)math_ceil(plane->composition.scaler_info.plane1.h_ratio * stream->timing.h_active / odm_combine_factor); - - if (temp < max_per_pipe_vp_p1) - max_per_pipe_vp_p1 = temp; - } else { - max_per_pipe_vp_p0 = plane->surface.plane0.width / mpc_combine_factor; - max_per_pipe_vp_p1 = plane->surface.plane1.width / mpc_combine_factor; - } - - max_per_pipe_vp_p0 += 2 * MAX_PIXEL_OVERLAP; - max_per_pipe_vp_p1 += MAX_PIXEL_OVERLAP; - - p0shift = 0; - p1shift = 0; - - // The last element in the unshifted boundary array will always be the first pixel outside the - // plane, which means theres no mcache associated with it, so -1 - num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane0 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane0 - 1; - if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, - num_boundaries, max_per_pipe_vp_p0) <= 1) && (num_boundaries <= num_dpps)) { - p0pass = true; - } - num_boundaries = params->mcache_allocations[plane_index].num_mcaches_plane1 == 0 ? 0 : params->mcache_allocations[plane_index].num_mcaches_plane1 - 1; - if ((count_elements_in_span(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, - num_boundaries, max_per_pipe_vp_p1) <= 1) && (num_boundaries <= num_dpps)) { - p1pass = true; - } - - if (!p0pass || !p1pass) { - if (odm_combine_factor > 1) { - num_dpps = odm_combine_factor; - scaling_transform = plane->composition.scaling_transform; - } else { - num_dpps = mpc_combine_factor; - scaling_transform = dml2_scaling_transform_fullscreen; - } - - if (!p0pass) { - if (plane->composition.viewport.stationary) { - calculate_h_split_for_scaling_transform(plane->surface.plane0.width, - stream->timing.h_active, num_dpps, scaling_transform, - &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); - p0pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane0, - params->mcache_allocations[plane_index].num_mcaches_plane0, - &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index], num_dpps, - params->mcache_allocations[plane_index].shift_granularity.p0, &p0shift); - } - } - if (!p1pass) { - if (plane->composition.viewport.stationary) { - calculate_h_split_for_scaling_transform(plane->surface.plane1.width, - stream->timing.h_active, num_dpps, scaling_transform, - &l->plane0.pipe_vp_startx[plane_index], &l->plane0.pipe_vp_endx[plane_index]); - p1pass = find_shift_for_valid_cache_id_assignment(params->mcache_allocations[plane_index].mcache_x_offsets_plane1, - params->mcache_allocations[plane_index].num_mcaches_plane1, - &l->plane1.pipe_vp_startx[plane_index], &l->plane1.pipe_vp_endx[plane_index], num_dpps, - params->mcache_allocations[plane_index].shift_granularity.p1, &p1shift); - } - } - } - - if (p0pass && p1pass) { - for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane0; i++) { - params->mcache_allocations[plane_index].mcache_x_offsets_plane0[i] -= p0shift; - } - for (i = 0; i < params->mcache_allocations[plane_index].num_mcaches_plane1; i++) { - params->mcache_allocations[plane_index].mcache_x_offsets_plane1[i] -= p1shift; - } - } - - params->per_plane_status[plane_index] = p0pass && p1pass; - all_pass &= p0pass && p1pass; - } - - return all_pass; -} - -static void reset_mcache_allocations(struct dml2_hubp_pipe_mcache_regs *per_plane_pipe_mcache_regs) -{ - // Initialize all entries to special valid MCache ID and special valid split coordinate - per_plane_pipe_mcache_regs->main.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->main.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->main.p0.split_location = SPLIT_LOCATION_UNDEFINED; - - per_plane_pipe_mcache_regs->mall.p0.mcache_id_first = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->mall.p0.mcache_id_second = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->mall.p0.split_location = SPLIT_LOCATION_UNDEFINED; - - per_plane_pipe_mcache_regs->main.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->main.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->main.p1.split_location = SPLIT_LOCATION_UNDEFINED; - - per_plane_pipe_mcache_regs->mall.p1.mcache_id_first = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->mall.p1.mcache_id_second = MCACHE_ID_UNASSIGNED; - per_plane_pipe_mcache_regs->mall.p1.split_location = SPLIT_LOCATION_UNDEFINED; -} - -bool dml2_top_mcache_build_mcache_programming(struct dml2_build_mcache_programming_in_out *params) -{ - bool success = true; - int config_index, pipe_index; - int first_offset, second_offset; - int free_per_plane_reg_index = 0; - - memset(params->per_plane_pipe_mcache_regs, 0, DML2_MAX_PLANES * DML2_MAX_DCN_PIPES * sizeof(struct dml2_hubp_pipe_mcache_regs *)); - - for (config_index = 0; config_index < params->num_configurations; config_index++) { - for (pipe_index = 0; pipe_index < params->mcache_configurations[config_index].num_pipes; pipe_index++) { - // Allocate storage for the mcache regs - params->per_plane_pipe_mcache_regs[config_index][pipe_index] = ¶ms->mcache_regs_set[free_per_plane_reg_index++]; - - reset_mcache_allocations(params->per_plane_pipe_mcache_regs[config_index][pipe_index]); - - if (params->mcache_configurations[config_index].plane_descriptor->surface.dcc.enable) { - // P0 always enabled - if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0, - params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane0, - 0, - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start, - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_x_start + - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane0.viewport_width - 1, - &first_offset, &second_offset)) { - success = false; - break; - } - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_first = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[first_offset]; - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_first = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[first_offset]; - - if (second_offset >= 0) { - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.mcache_id_second = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane0[second_offset]; - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p0.split_location = - params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.mcache_id_second = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane0[second_offset]; - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p0.split_location = - params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane0[first_offset] - 1; - } - - // Populate P1 if enabled - if (params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1_enabled) { - if (!calculate_first_second_splitting(params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1, - params->mcache_configurations[config_index].mcache_allocation->num_mcaches_plane1, - 0, - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start, - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_x_start + - params->mcache_configurations[config_index].pipe_configurations[pipe_index].plane1.viewport_width - 1, - &first_offset, &second_offset)) { - success = false; - break; - } - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_first = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[first_offset]; - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_first = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[first_offset]; - - if (second_offset >= 0) { - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.mcache_id_second = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_plane1[second_offset]; - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->main.p1.split_location = - params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; - - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.mcache_id_second = - params->mcache_configurations[config_index].mcache_allocation->global_mcache_ids_mall_plane1[second_offset]; - params->per_plane_pipe_mcache_regs[config_index][pipe_index]->mall.p1.split_location = - params->mcache_configurations[config_index].mcache_allocation->mcache_x_offsets_plane1[first_offset] - 1; - } - } - } - } - } - - return success; -} - -void dml2_top_mcache_assign_global_mcache_ids(struct top_mcache_assign_global_mcache_ids_in_out *params) -{ - int i; - unsigned int j; - int next_unused_cache_id = 0; - - for (i = 0; i < params->num_allocations; i++) { - if (!params->allocations[i].valid) - continue; - - for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { - params->allocations[i].global_mcache_ids_plane0[j] = next_unused_cache_id++; - } - for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { - params->allocations[i].global_mcache_ids_plane1[j] = next_unused_cache_id++; - } - - // The "psuedo-last" slice is always wrapped around - params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0] = - params->allocations[i].global_mcache_ids_plane0[0]; - params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1] = - params->allocations[i].global_mcache_ids_plane1[0]; - - // If we need dedicated caches for mall requesting, then we assign them here. - if (params->allocations[i].requires_dedicated_mall_mcache) { - for (j = 0; j < params->allocations[i].num_mcaches_plane0; j++) { - params->allocations[i].global_mcache_ids_mall_plane0[j] = next_unused_cache_id++; - } - for (j = 0; j < params->allocations[i].num_mcaches_plane1; j++) { - params->allocations[i].global_mcache_ids_mall_plane1[j] = next_unused_cache_id++; - } - - // The "psuedo-last" slice is always wrapped around - params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0] = - params->allocations[i].global_mcache_ids_mall_plane0[0]; - params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1] = - params->allocations[i].global_mcache_ids_mall_plane1[0]; - } - - // If P0 and P1 are sharing caches, then it means the largest mcache IDs for p0 and p1 can be the same - // since mcache IDs are always ascending, then it means the largest mcacheID of p1 should be the - // largest mcacheID of P0 - if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && - params->allocations[i].last_slice_sharing.plane0_plane1) { - params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1] = - params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; - } - - // If we need dedicated caches handle last slice sharing - if (params->allocations[i].requires_dedicated_mall_mcache) { - if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].num_mcaches_plane1 > 0 && - params->allocations[i].last_slice_sharing.plane0_plane1) { - params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = - params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1]; - } - // If mall_comb_mcache_l is set then it means that largest mcache ID for MALL p0 can be same as regular read p0 - if (params->allocations[i].num_mcaches_plane0 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p0) { - params->allocations[i].global_mcache_ids_mall_plane0[params->allocations[i].num_mcaches_plane0 - 1] = - params->allocations[i].global_mcache_ids_plane0[params->allocations[i].num_mcaches_plane0 - 1]; - } - // If mall_comb_mcache_c is set then it means that largest mcache ID for MALL p1 can be same as regular - // read p1 (which can be same as regular read p0 if plane0_plane1 is also set) - if (params->allocations[i].num_mcaches_plane1 > 0 && params->allocations[i].last_slice_sharing.mall_comb_mcache_p1) { - params->allocations[i].global_mcache_ids_mall_plane1[params->allocations[i].num_mcaches_plane1 - 1] = - params->allocations[i].global_mcache_ids_plane1[params->allocations[i].num_mcaches_plane1 - 1]; - } - } - - // If you don't need dedicated mall mcaches, the mall mcache assignments are identical to the normal requesting - if (!params->allocations[i].requires_dedicated_mall_mcache) { - memcpy(params->allocations[i].global_mcache_ids_mall_plane0, params->allocations[i].global_mcache_ids_plane0, - sizeof(params->allocations[i].global_mcache_ids_mall_plane0)); - memcpy(params->allocations[i].global_mcache_ids_mall_plane1, params->allocations[i].global_mcache_ids_plane1, - sizeof(params->allocations[i].global_mcache_ids_mall_plane1)); - } - } -} - -bool dml2_top_mcache_calc_mcache_count_and_offsets(struct top_mcache_calc_mcache_count_and_offsets_in_out *params) -{ - struct dml2_instance *dml = (struct dml2_instance *)params->dml2_instance; - struct dml2_top_mcache_verify_mcache_size_locals *l = &dml->scratch.mcache_verify_mcache_size_locals; - - unsigned int total_mcaches_required; - unsigned int i; - bool result = false; - - if (dml->soc_bbox.num_dcc_mcaches == 0) { - return true; - } - - total_mcaches_required = 0; - l->calc_mcache_params.instance = &dml->core_instance; - for (i = 0; i < params->display_config->num_planes; i++) { - if (!params->display_config->plane_descriptors[i].surface.dcc.enable) { - memset(¶ms->mcache_allocations[i], 0, sizeof(struct dml2_mcache_surface_allocation)); - continue; - } - - l->calc_mcache_params.plane_descriptor = ¶ms->display_config->plane_descriptors[i]; - l->calc_mcache_params.mcache_allocation = ¶ms->mcache_allocations[i]; - l->calc_mcache_params.plane_index = i; - - if (!dml->core_instance.calculate_mcache_allocation(&l->calc_mcache_params)) { - result = false; - break; - } - - if (params->mcache_allocations[i].valid) { - total_mcaches_required += params->mcache_allocations[i].num_mcaches_plane0 + params->mcache_allocations[i].num_mcaches_plane1; - if (params->mcache_allocations[i].last_slice_sharing.plane0_plane1) - total_mcaches_required--; - } - } - dml2_printf("DML_CORE_DCN3::%s: plane_%d, total_mcaches_required=%d\n", __func__, i, total_mcaches_required); - - if (total_mcaches_required > dml->soc_bbox.num_dcc_mcaches) { - result = false; - } else { - result = true; - } - - return result; -} diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c index e9b8e10695ae..f95c7ff56f15 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.c @@ -4,6 +4,11 @@ #include "dml2_debug.h" +int dml2_log_internal(const char *format, ...) +{ + return 0; +} + int dml2_printf(const char *format, ...) { #ifdef _DEBUG diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h index d51a1b6c62f2..a27792b56f7e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_debug.h @@ -8,9 +8,53 @@ #ifdef _DEBUG #define DML2_ASSERT(condition) dml2_assert(condition) #else -#define DML2_ASSERT(condition) +#define DML2_ASSERT(condition) ((void)0) +#endif +/* + * DML_LOG_FATAL - fatal errors for unrecoverable DML states until a restart. + * DML_LOG_ERROR - unexpected but recoverable failures inside DML + * DML_LOG_WARN - unexpected inputs or events to DML + * DML_LOG_INFO - high level tracing of DML interfaces + * DML_LOG_DEBUG - detailed tracing of DML internal components + * DML_LOG_VERBOSE - detailed tracing of DML calculation procedure + */ +#if !defined(DML_LOG_LEVEL) +#if defined(_DEBUG) && defined(_DEBUG_PRINTS) +/* for backward compatibility with old macros */ +#define DML_LOG_LEVEL 5 +#else +#define DML_LOG_LEVEL 0 +#endif +#endif + +#define DML_LOG_FATAL(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#if DML_LOG_LEVEL >= 1 +#define DML_LOG_ERROR(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#else +#define DML_LOG_ERROR(fmt, ...) ((void)0) +#endif +#if DML_LOG_LEVEL >= 2 +#define DML_LOG_WARN(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#else +#define DML_LOG_WARN(fmt, ...) ((void)0) +#endif +#if DML_LOG_LEVEL >= 3 +#define DML_LOG_INFO(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#else +#define DML_LOG_INFO(fmt, ...) ((void)0) +#endif +#if DML_LOG_LEVEL >= 4 +#define DML_LOG_DEBUG(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#else +#define DML_LOG_DEBUG(fmt, ...) ((void)0) +#endif +#if DML_LOG_LEVEL >= 5 +#define DML_LOG_VERBOSE(fmt, ...) dml2_log_internal(fmt, ## __VA_ARGS__) +#else +#define DML_LOG_VERBOSE(fmt, ...) ((void)0) #endif +int dml2_log_internal(const char *format, ...); int dml2_printf(const char *format, ...); void dml2_assert(int condition); diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h index aeac9f159fa5..d94b310d6eec 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/src/inc/dml2_internal_shared_types.h @@ -8,7 +8,6 @@ #include "dml2_external_lib_deps.h" #include "dml_top_types.h" #include "dml2_core_shared_types.h" - /* * DML2 MCG Types and Interfaces */ @@ -63,7 +62,6 @@ struct dml2_mcg_build_min_clock_table_params_in_out { */ struct dml2_mcg_min_clock_table *min_clk_table; }; - struct dml2_mcg_instance { bool (*build_min_clock_table)(struct dml2_mcg_build_min_clock_table_params_in_out *in_out); bool (*unit_test)(void); @@ -81,7 +79,6 @@ struct dml2_dpmm_map_mode_to_soc_dpm_params_in_out { struct dml2_soc_bb *soc_bb; struct dml2_mcg_min_clock_table *min_clk_table; const struct display_configuation_with_meta *display_cfg; - struct { bool perform_pseudo_map; struct dml2_core_internal_soc_bb *soc_bb; @@ -309,7 +306,7 @@ struct dml2_optimization_stage3_state { // The pstate support mode for each plane // The number of valid elements == display_cfg.num_planes // The indexing of pstate_switch_modes matches plane_descriptors[] - enum dml2_uclk_pstate_support_method pstate_switch_modes[DML2_MAX_PLANES]; + enum dml2_pstate_method pstate_switch_modes[DML2_MAX_PLANES]; // Meta-data for implicit SVP generation, indexed by stream index struct dml2_implicit_svp_meta stream_svp_meta[DML2_MAX_PLANES]; @@ -356,6 +353,12 @@ struct display_configuation_with_meta { struct dml2_optimization_stage5_state stage5; }; +struct dml2_pmo_pstate_strategy { + enum dml2_pstate_method per_stream_pstate_method[DML2_MAX_PLANES]; + bool allow_state_increase; +}; + + struct dml2_core_mode_support_in_out { /* * Inputs @@ -365,7 +368,6 @@ struct dml2_core_mode_support_in_out { struct dml2_mcg_min_clock_table *min_clk_table; int min_clk_index; - /* * Outputs */ @@ -395,7 +397,6 @@ struct dml2_core_mode_programming_in_out { struct dml2_core_instance *instance; const struct display_configuation_with_meta *display_cfg; const struct core_display_cfg_support_info *cfg_support_info; - /* * Outputs (also Input the clk freq are also from programming struct) */ @@ -445,6 +446,7 @@ struct dml2_core_internal_state_intermediates { struct dml2_core_mode_support_locals { struct dml2_core_calcs_mode_support_ex mode_support_ex_params; struct dml2_display_cfg svp_expanded_display_cfg; + struct dml2_calculate_mcache_allocation_in_out calc_mcache_allocation_params; }; struct dml2_core_mode_programming_locals { @@ -600,34 +602,11 @@ struct dml2_pmo_optimize_for_stutter_in_out { struct display_configuation_with_meta *optimized_display_config; }; -enum dml2_pmo_pstate_method { - dml2_pmo_pstate_strategy_na = 0, - /* hw exclusive modes */ - dml2_pmo_pstate_strategy_vactive = 1, - dml2_pmo_pstate_strategy_vblank = 2, - dml2_pmo_pstate_strategy_reserved_hw = 5, - /* fw assisted exclusive modes */ - dml2_pmo_pstate_strategy_fw_svp = 6, - dml2_pmo_pstate_strategy_reserved_fw = 10, - /* fw assisted modes requiring drr modulation */ - dml2_pmo_pstate_strategy_fw_vactive_drr = 11, - dml2_pmo_pstate_strategy_fw_vblank_drr = 12, - dml2_pmo_pstate_strategy_fw_svp_drr = 13, - dml2_pmo_pstate_strategy_reserved_fw_drr_clamped = 20, - dml2_pmo_pstate_strategy_fw_drr = 21, - dml2_pmo_pstate_strategy_reserved_fw_drr_var = 22, -}; - -struct dml2_pmo_pstate_strategy { - enum dml2_pmo_pstate_method per_stream_pstate_method[DML2_MAX_PLANES]; - bool allow_state_increase; -}; - -#define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw - dml2_pmo_pstate_strategy_na + 1)) - 1) << dml2_pmo_pstate_strategy_na) -#define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr) -#define PMO_DRR_CLAMPED_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_clamped - dml2_pmo_pstate_strategy_fw_vactive_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_vactive_drr) -#define PMO_DRR_VAR_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_drr + 1)) - 1) << dml2_pmo_pstate_strategy_fw_drr) -#define PMO_FW_STRATEGY_MASK (((1 << (dml2_pmo_pstate_strategy_reserved_fw_drr_var - dml2_pmo_pstate_strategy_fw_svp + 1)) - 1) << dml2_pmo_pstate_strategy_fw_svp) +#define PMO_NO_DRR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw - dml2_pstate_method_na + 1)) - 1) << dml2_pstate_method_na) +#define PMO_DRR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_vactive_drr + 1)) - 1) << dml2_pstate_method_fw_vactive_drr) +#define PMO_DRR_CLAMPED_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_clamped - dml2_pstate_method_fw_vactive_drr + 1)) - 1) << dml2_pstate_method_fw_vactive_drr) +#define PMO_DRR_VAR_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_drr + 1)) - 1) << dml2_pstate_method_fw_drr) +#define PMO_FW_STRATEGY_MASK (((1 << (dml2_pstate_method_reserved_fw_drr_var - dml2_pstate_method_fw_svp + 1)) - 1) << dml2_pstate_method_fw_svp) #define PMO_DCN4_MAX_DISPLAYS 4 #define PMO_DCN4_MAX_NUM_VARIANTS 2 @@ -645,6 +624,8 @@ struct dml2_pmo_scratch { int stream_mask; } pmo_dcn3; struct { + struct dml2_pmo_pstate_strategy expanded_override_strategy_list[2 * 2 * 2 * 2]; + unsigned int num_expanded_override_strategies; struct dml2_pmo_pstate_strategy pstate_strategy_candidates[DML2_PMO_PSTATE_CANDIDATE_LIST_SIZE]; int num_pstate_candidates; int cur_pstate_candidate; @@ -706,7 +687,6 @@ struct dml2_pmo_instance { int mpc_combine_limit; int odm_combine_limit; int mcg_clock_table_size; - union { struct { struct { @@ -963,7 +943,13 @@ struct dml2_top_mcache_validate_admissability_locals { struct dml2_top_display_cfg_support_info { const struct dml2_display_cfg *display_config; struct core_display_cfg_support_info core_info; - enum dml2_pstate_support_method per_plane_pstate_method[DML2_MAX_PLANES]; +}; + +struct dml2_top_funcs { + bool (*check_mode_supported)(struct dml2_check_mode_supported_in_out *in_out); + bool (*build_mode_programming)(struct dml2_build_mode_programming_in_out *in_out); + bool (*build_mcache_programming)(struct dml2_build_mcache_programming_in_out *in_out); + bool (*unit_test)(void); }; struct dml2_instance { @@ -978,8 +964,8 @@ struct dml2_instance { struct dml2_ip_capabilities ip_caps; struct dml2_mcg_min_clock_table min_clk_table; - struct dml2_pmo_options pmo_options; + struct dml2_top_funcs funcs; struct { struct dml2_initialize_instance_locals initialize_instance_locals; diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c index bde4250853b1..b416320873e1 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_translation_helper.c @@ -553,13 +553,53 @@ void dml2_init_soc_states(struct dml2_context *dml2, const struct dc *in_dc, } } - dml2_policy_build_synthetic_soc_states(s, p); - if (dml2->v20.dml_core_ctx.project == dml_project_dcn35) { - // Override last out_state with data from last in_state - // This will ensure that out_state contains max fclk - memcpy(&p->out_states->state_array[p->out_states->num_states - 1], - &p->in_states->state_array[p->in_states->num_states - 1], - sizeof(struct soc_state_bounding_box_st)); + if (dml2->v20.dml_core_ctx.project == dml_project_dcn35 || + dml2->v20.dml_core_ctx.project == dml_project_dcn351) { + int max_dcfclk_mhz = 0, max_dispclk_mhz = 0, max_dppclk_mhz = 0, max_phyclk_mhz = 0, + max_dtbclk_mhz = 0, max_fclk_mhz = 0, max_uclk_mhz = 0, max_socclk_mhz = 0; + + for (i = 0; i < p->in_states->num_states; i++) { + if (p->in_states->state_array[i].dcfclk_mhz > max_dcfclk_mhz) + max_dcfclk_mhz = (int)p->in_states->state_array[i].dcfclk_mhz; + if (p->in_states->state_array[i].fabricclk_mhz > max_fclk_mhz) + max_fclk_mhz = (int)p->in_states->state_array[i].fabricclk_mhz; + if (p->in_states->state_array[i].socclk_mhz > max_socclk_mhz) + max_socclk_mhz = (int)p->in_states->state_array[i].socclk_mhz; + if (p->in_states->state_array[i].dram_speed_mts > max_uclk_mhz) + max_uclk_mhz = (int)p->in_states->state_array[i].dram_speed_mts; + if (p->in_states->state_array[i].dispclk_mhz > max_dispclk_mhz) + max_dispclk_mhz = (int)p->in_states->state_array[i].dispclk_mhz; + if (p->in_states->state_array[i].dppclk_mhz > max_dppclk_mhz) + max_dppclk_mhz = (int)p->in_states->state_array[i].dppclk_mhz; + if (p->in_states->state_array[i].phyclk_mhz > max_phyclk_mhz) + max_phyclk_mhz = (int)p->in_states->state_array[i].phyclk_mhz; + if (p->in_states->state_array[i].dtbclk_mhz > max_dtbclk_mhz) + max_dtbclk_mhz = (int)p->in_states->state_array[i].dtbclk_mhz; + } + + for (i = 0; i < p->in_states->num_states; i++) { + /* Independent states - including base (unlisted) parameters from state 0. */ + p->out_states->state_array[i] = p->in_states->state_array[0]; + + p->out_states->state_array[i].dispclk_mhz = max_dispclk_mhz; + p->out_states->state_array[i].dppclk_mhz = max_dppclk_mhz; + p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; + p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; + + p->out_states->state_array[i].dscclk_mhz = max_dispclk_mhz / 3.0; + p->out_states->state_array[i].phyclk_mhz = max_phyclk_mhz; + p->out_states->state_array[i].dtbclk_mhz = max_dtbclk_mhz; + + /* Dependent states. */ + p->out_states->state_array[i].dram_speed_mts = p->in_states->state_array[i].dram_speed_mts; + p->out_states->state_array[i].fabricclk_mhz = p->in_states->state_array[i].fabricclk_mhz; + p->out_states->state_array[i].socclk_mhz = p->in_states->state_array[i].socclk_mhz; + p->out_states->state_array[i].dcfclk_mhz = p->in_states->state_array[i].dcfclk_mhz; + } + + p->out_states->num_states = p->in_states->num_states; + } else { + dml2_policy_build_synthetic_soc_states(s, p); } } diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c index 9190c1328d5b..340791d40ecb 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml2_wrapper.c @@ -531,14 +531,21 @@ static bool optimize_pstate_with_svp_and_drr(struct dml2_context *dml2, struct d static bool call_dml_mode_support_and_programming(struct dc_state *context) { unsigned int result = 0; - unsigned int min_state; + unsigned int min_state = 0; int min_state_for_g6_temp_read = 0; + + + if (!context) + return false; + struct dml2_context *dml2 = context->bw_ctx.dml2; struct dml2_wrapper_scratch *s = &dml2->v20.scratch; - min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context); + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + min_state_for_g6_temp_read = calculate_lowest_supported_state_for_temp_read(dml2, context); - ASSERT(min_state_for_g6_temp_read >= 0); + ASSERT(min_state_for_g6_temp_read >= 0); + } if (!dml2->config.use_native_pstate_optimization) { result = optimize_pstate_with_svp_and_drr(dml2, context); @@ -549,14 +556,20 @@ static bool call_dml_mode_support_and_programming(struct dc_state *context) /* Upon trying to sett certain frequencies in FRL, min_state_for_g6_temp_read is reported as -1. This leads to an invalid value of min_state causing crashes later on. * Use the default logic for min_state only when min_state_for_g6_temp_read is a valid value. In other cases, use the value calculated by the DML directly. */ - if (min_state_for_g6_temp_read >= 0) - min_state = min_state_for_g6_temp_read > s->mode_support_params.out_lowest_state_idx ? min_state_for_g6_temp_read : s->mode_support_params.out_lowest_state_idx; - else - min_state = s->mode_support_params.out_lowest_state_idx; - - if (result) - result = dml_mode_programming(&dml2->v20.dml_core_ctx, min_state, &s->cur_display_config, true); + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + if (min_state_for_g6_temp_read >= 0) + min_state = min_state_for_g6_temp_read > s->mode_support_params.out_lowest_state_idx ? min_state_for_g6_temp_read : s->mode_support_params.out_lowest_state_idx; + else + min_state = s->mode_support_params.out_lowest_state_idx; + } + if (result) { + if (!context->streams[0]->sink->link->dc->caps.is_apu) { + result = dml_mode_programming(&dml2->v20.dml_core_ctx, min_state, &s->cur_display_config, true); + } else { + result = dml_mode_programming(&dml2->v20.dml_core_ctx, s->mode_support_params.out_lowest_state_idx, &s->cur_display_config, true); + } + } return result; } @@ -685,6 +698,8 @@ static bool dml2_validate_only(struct dc_state *context) build_unoptimized_policy_settings(dml2->v20.dml_core_ctx.project, &dml2->v20.dml_core_ctx.policy); map_dc_state_into_dml_display_cfg(dml2, context, &dml2->v20.scratch.cur_display_config); + if (!dml2->config.skip_hw_state_mapping) + dml2_apply_det_buffer_allocation_policy(dml2, &dml2->v20.scratch.cur_display_config); result = pack_and_call_dml_mode_support_ex(dml2, &dml2->v20.scratch.cur_display_config, diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c index 377ef6d01ae5..00d22e542469 100644 --- a/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml2/dml_display_rq_dlg_calc.c @@ -427,18 +427,6 @@ void dml_rq_dlg_get_dlg_reg(dml_display_dlg_regs_st *disp_dlg_regs, dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_vm_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_vm_flip); dml_print("DML_DLG: %s: disp_dlg_regs->dst_y_per_row_flip = 0x%x\n", __func__, disp_dlg_regs->dst_y_per_row_flip); - // hack for FPGA - /* NOTE: We dont have getenv defined in driver and it does not make any sense in the driver */ - /*char* fpga_env = getenv("FPGA_FPDIV"); - if(fpga_env !=NULL) - { - if(disp_dlg_regs->vratio_prefetch >= (dml_uint_t)dml_pow(2, 22)) - { - disp_dlg_regs->vratio_prefetch = (dml_uint_t)dml_pow(2, 22)-1; - dml_print("FPGA msg: vratio_prefetch exceed the max value, the register field is [21:0]\n"); - } - }*/ - disp_dlg_regs->refcyc_per_vm_group_vblank = (dml_uint_t)(dml_get_refcyc_per_vm_group_vblank_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); disp_dlg_regs->refcyc_per_vm_group_flip = (dml_uint_t)(dml_get_refcyc_per_vm_group_flip_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz); disp_dlg_regs->refcyc_per_vm_req_vblank = (dml_uint_t)(dml_get_refcyc_per_vm_req_vblank_in_us(mode_lib, pipe_idx) * refclk_freq_in_mhz * dml_pow(2, 10)); diff --git a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c index fae98cf52020..bc058f682438 100644 --- a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c +++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.c @@ -270,16 +270,3 @@ void dcn30_dwbc_construct(struct dcn30_dwbc *dwbc30, dwbc30->dwbc_shift = dwbc_shift; dwbc30->dwbc_mask = dwbc_mask; } - -void dwb3_set_host_read_rate_control(struct dwbc *dwbc, bool host_read_delay) -{ - struct dcn30_dwbc *dwbc30 = TO_DCN30_DWBC(dwbc); - - /* - * Set maximum delay of host read access to DWBSCL LUT or OGAM LUT if there are no - * idle cycles in HW pipeline (in number of clock cycles times 4) - */ - REG_UPDATE(DWB_HOST_READ_CONTROL, DWB_HOST_READ_RATE_CONTROL, host_read_delay); - - DC_LOG_DWB("%s dwb3_rate_control at inst = %d", __func__, dwbc->inst); -} diff --git a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h index 0f3f7c5fbaec..7f053f49ec6a 100644 --- a/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h +++ b/drivers/gpu/drm/amd/display/dc/dwb/dcn30/dcn30_dwb.h @@ -914,7 +914,6 @@ bool dwb3_ogam_set_input_transfer_func( struct dwbc *dwbc, const struct dc_transfer_func *in_transfer_func_dwb_ogam); -void dwb3_set_host_read_rate_control(struct dwbc *dwbc, bool host_read_delay); #endif diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c index 22ac2b7e49ae..f0ba944553df 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.c @@ -518,6 +518,20 @@ bool hubp1_program_surface_flip_and_addr( return true; } +void hubp1_clear_tiling(struct hubp *hubp) +{ + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + + REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0); + REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR); + + REG_UPDATE_4(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_DCC_EN, 0, + PRIMARY_SURFACE_DCC_IND_64B_BLK, 0, + SECONDARY_SURFACE_DCC_EN, 0, + SECONDARY_SURFACE_DCC_IND_64B_BLK, 0); +} + void hubp1_dcc_control(struct hubp *hubp, bool enable, enum hubp_ind_block_size independent_64b_blks) { @@ -1363,6 +1377,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = { .hubp_disable_control = hubp1_disable_control, .hubp_get_underflow_status = hubp1_get_underflow_status, .hubp_init = hubp1_init, + .hubp_clear_tiling = hubp1_clear_tiling, .dmdata_set_attributes = NULL, .dmdata_load = NULL, diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h index 69119b2fdce2..631350cd4f2e 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn10/dcn10_hubp.h @@ -794,4 +794,6 @@ void hubp1_soft_reset(struct hubp *hubp, bool reset); void hubp1_set_flip_int(struct hubp *hubp); +void hubp1_clear_tiling(struct hubp *hubp); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c index 0637e4c552d8..200194544bf0 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.c @@ -406,6 +406,20 @@ void hubp2_program_rotation( H_MIRROR_EN, mirror); } +void hubp2_clear_tiling(struct hubp *hubp) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0); + REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR); + + REG_UPDATE_4(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_DCC_EN, 0, + PRIMARY_SURFACE_DCC_IND_64B_BLK, 0, + SECONDARY_SURFACE_DCC_EN, 0, + SECONDARY_SURFACE_DCC_IND_64B_BLK, 0); +} + void hubp2_dcc_control(struct hubp *hubp, bool enable, enum hubp_ind_block_size independent_64b_blks) { @@ -1676,6 +1690,7 @@ static struct hubp_funcs dcn20_hubp_funcs = { .hubp_in_blank = hubp1_in_blank, .hubp_soft_reset = hubp1_soft_reset, .hubp_set_flip_int = hubp1_set_flip_int, + .hubp_clear_tiling = hubp2_clear_tiling, }; diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h index 18e194507e36..7fd9240868c3 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn20/dcn20_hubp.h @@ -409,6 +409,8 @@ void hubp2_read_state_common(struct hubp *hubp); void hubp2_read_state(struct hubp *hubp); +void hubp2_clear_tiling(struct hubp *hubp); + #endif /* __DC_MEM_INPUT_DCN20_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c index cd2bfcc51276..d910e4a54c34 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn201/dcn201_hubp.c @@ -131,6 +131,7 @@ static struct hubp_funcs dcn201_hubp_funcs = { .hubp_clear_underflow = hubp1_clear_underflow, .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl, .hubp_init = hubp1_init, + .hubp_clear_tiling = hubp1_clear_tiling, }; bool dcn201_hubp_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c index e13d69a22c1c..edbdb8c88d5c 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn21/dcn21_hubp.c @@ -837,6 +837,7 @@ static struct hubp_funcs dcn21_hubp_funcs = { .hubp_init = hubp21_init, .validate_dml_output = hubp21_validate_dml_output, .hubp_set_flip_int = hubp1_set_flip_int, + .hubp_clear_tiling = hubp1_clear_tiling, }; bool hubp21_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c index 60a64d290352..3b16c3cda2c3 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.c @@ -334,6 +334,22 @@ void hubp3_program_tiling( } +void hubp3_clear_tiling(struct hubp *hubp) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0); + REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR); + + REG_UPDATE_6(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_DCC_EN, 0, + PRIMARY_SURFACE_DCC_IND_BLK, 0, + PRIMARY_SURFACE_DCC_IND_BLK_C, 0, + SECONDARY_SURFACE_DCC_EN, 0, + SECONDARY_SURFACE_DCC_IND_BLK, 0, + SECONDARY_SURFACE_DCC_IND_BLK_C, 0); +} + void hubp3_dcc_control(struct hubp *hubp, bool enable, enum hubp_ind_block_size blk_size) { @@ -512,6 +528,7 @@ static struct hubp_funcs dcn30_hubp_funcs = { .hubp_in_blank = hubp1_in_blank, .hubp_soft_reset = hubp1_soft_reset, .hubp_set_flip_int = hubp1_set_flip_int, + .hubp_clear_tiling = hubp3_clear_tiling, }; bool hubp3_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h index b010531a7fe8..cfb01bf340a1 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn30/dcn30_hubp.h @@ -297,6 +297,8 @@ void hubp3_read_state(struct hubp *hubp); void hubp3_init(struct hubp *hubp); +void hubp3_clear_tiling(struct hubp *hubp); + #endif /* __DC_HUBP_DCN30_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c index 8394e8c06919..46b804ed05fb 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn31/dcn31_hubp.c @@ -96,6 +96,7 @@ static struct hubp_funcs dcn31_hubp_funcs = { .hubp_set_flip_int = hubp1_set_flip_int, .hubp_in_blank = hubp1_in_blank, .program_extended_blank = hubp31_program_extended_blank, + .hubp_clear_tiling = hubp3_clear_tiling, }; bool hubp31_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c index ca5b4b28a664..8b5bd73b8094 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn32/dcn32_hubp.c @@ -201,7 +201,8 @@ static struct hubp_funcs dcn32_hubp_funcs = { .hubp_update_force_cursor_pstate_disallow = hubp32_update_force_cursor_pstate_disallow, .phantom_hubp_post_enable = hubp32_phantom_hubp_post_enable, .hubp_update_mall_sel = hubp32_update_mall_sel, - .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering + .hubp_prepare_subvp_buffering = hubp32_prepare_subvp_buffering, + .hubp_clear_tiling = hubp3_clear_tiling, }; bool hubp32_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c index d1f05b82b3dd..eb62042dfafc 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn35/dcn35_hubp.c @@ -216,6 +216,7 @@ static struct hubp_funcs dcn35_hubp_funcs = { .hubp_set_flip_int = hubp1_set_flip_int, .hubp_in_blank = hubp1_in_blank, .program_extended_blank = hubp31_program_extended_blank_value, + .hubp_clear_tiling = hubp3_clear_tiling, }; bool hubp35_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c index b1ebf5053b4f..09f730cfbf8e 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.c @@ -40,7 +40,7 @@ #define FN(reg_name, field_name) \ hubp2->hubp_shift->field_name, hubp2->hubp_mask->field_name -static void hubp401_program_3dlut_fl_addr(struct hubp *hubp, +void hubp401_program_3dlut_fl_addr(struct hubp *hubp, const struct dc_plane_address address) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -49,14 +49,14 @@ static void hubp401_program_3dlut_fl_addr(struct hubp *hubp, REG_WRITE(HUBP_3DLUT_ADDRESS_LOW, address.lut3d.addr.low_part); } -static void hubp401_program_3dlut_fl_dlg_param(struct hubp *hubp, int refcyc_per_3dlut_group) +void hubp401_program_3dlut_fl_dlg_param(struct hubp *hubp, int refcyc_per_3dlut_group) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE(HUBP_3DLUT_DLG_PARAM, REFCYC_PER_3DLUT_GROUP, refcyc_per_3dlut_group); } -static void hubp401_enable_3dlut_fl(struct hubp *hubp, bool enable) +void hubp401_enable_3dlut_fl(struct hubp *hubp, bool enable) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -72,28 +72,28 @@ int hubp401_get_3dlut_fl_done(struct hubp *hubp) return ret; } -static void hubp401_program_3dlut_fl_addressing_mode(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode) +void hubp401_program_3dlut_fl_addressing_mode(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_ADDRESSING_MODE, addr_mode); } -static void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width width) +void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width width) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_WIDTH, width); } -static void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, bool protection_enabled) +void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, bool protection_enabled) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE(HUBP_3DLUT_CONTROL, HUBP_3DLUT_TMZ, protection_enabled ? 1 : 0); } -static void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp, +void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_y_g, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r) @@ -106,21 +106,21 @@ static void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp, HUBP_3DLUT_CROSSBAR_SELECT_CR_R, bit_slice_cr_r); } -static void hubp401_update_3dlut_fl_bias_scale(struct hubp *hubp, uint16_t bias, uint16_t scale) +void hubp401_update_3dlut_fl_bias_scale(struct hubp *hubp, uint16_t bias, uint16_t scale) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE_2(_3DLUT_FL_BIAS_SCALE, HUBP0_3DLUT_FL_BIAS, bias, HUBP0_3DLUT_FL_SCALE, scale); } -static void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode) +void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); REG_UPDATE(_3DLUT_FL_CONFIG, HUBP0_3DLUT_FL_MODE, mode); } -static void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_format format) +void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_format format) { struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); @@ -508,6 +508,18 @@ bool hubp401_program_surface_flip_and_addr( return true; } +void hubp401_clear_tiling(struct hubp *hubp) +{ + struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp); + + REG_UPDATE(DCHUBP_REQ_SIZE_CONFIG, SWATH_HEIGHT, 0); + REG_UPDATE(DCSURF_TILING_CONFIG, SW_MODE, DC_SW_LINEAR); + + REG_UPDATE_2(DCSURF_SURFACE_CONTROL, + PRIMARY_SURFACE_DCC_EN, 0, + SECONDARY_SURFACE_DCC_EN, 0); +} + void hubp401_dcc_control(struct hubp *hubp, struct dc_plane_dcc_param *dcc) { @@ -1004,7 +1016,8 @@ static struct hubp_funcs dcn401_hubp_funcs = { .hubp_program_3dlut_fl_width = hubp401_program_3dlut_fl_width, .hubp_program_3dlut_fl_tmz_protected = hubp401_program_3dlut_fl_tmz_protected, .hubp_program_3dlut_fl_crossbar = hubp401_program_3dlut_fl_crossbar, - .hubp_get_3dlut_fl_done = hubp401_get_3dlut_fl_done + .hubp_get_3dlut_fl_done = hubp401_get_3dlut_fl_done, + .hubp_clear_tiling = hubp2_clear_tiling, }; bool hubp401_construct( diff --git a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h index e52fdb5b0cd0..9b200a55bf9d 100644 --- a/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/hubp/dcn401/dcn401_hubp.h @@ -340,4 +340,29 @@ int hubp401_get_3dlut_fl_done(struct hubp *hubp); void hubp401_set_unbounded_requesting(struct hubp *hubp, bool enable); +void hubp401_update_3dlut_fl_bias_scale(struct hubp *hubp, uint16_t bias, uint16_t scale); + +void hubp401_program_3dlut_fl_crossbar(struct hubp *hubp, + enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_y_g, + enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b, + enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r); + +void hubp401_program_3dlut_fl_tmz_protected(struct hubp *hubp, bool protection_enabled); + +void hubp401_program_3dlut_fl_width(struct hubp *hubp, enum hubp_3dlut_fl_width width); + +void hubp401_program_3dlut_fl_addressing_mode(struct hubp *hubp, enum hubp_3dlut_fl_addressing_mode addr_mode); + +void hubp401_enable_3dlut_fl(struct hubp *hubp, bool enable); + +void hubp401_program_3dlut_fl_dlg_param(struct hubp *hubp, int refcyc_per_3dlut_group); + +void hubp401_program_3dlut_fl_addr(struct hubp *hubp, const struct dc_plane_address address); + +void hubp401_program_3dlut_fl_format(struct hubp *hubp, enum hubp_3dlut_fl_format format); + +void hubp401_program_3dlut_fl_mode(struct hubp *hubp, enum hubp_3dlut_fl_mode mode); + +void hubp401_clear_tiling(struct hubp *hubp); + #endif /* __DC_HUBP_DCN401_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c index 0e8d32e3dbae..c32764aef884 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn30/dcn30_init.c @@ -86,7 +86,6 @@ static const struct hw_sequencer_funcs dcn30_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c index 780ce4c064aa..dcb27cdbce73 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn301/dcn301_init.c @@ -86,7 +86,6 @@ static const struct hw_sequencer_funcs dcn301_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c index 5f8f45b48720..fb2ffb637931 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn31/dcn31_init.c @@ -89,7 +89,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, @@ -98,7 +97,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .set_backlight_level = dcn31_set_backlight_level, + .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, .enable_lvds_link_output = dce110_enable_lvds_link_output, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c index 9b88eb72086d..be26c925fdfa 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_hwseq.c @@ -162,6 +162,8 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx int opp_inst[MAX_PIPES] = {0}; int odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, false); int last_odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, true); + struct mpc *mpc = dc->res_pool->mpc; + int i; opp_cnt = get_odm_config(pipe_ctx, opp_inst); @@ -174,6 +176,16 @@ void dcn314_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); + if (mpc->funcs->set_out_rate_control) { + for (i = 0; i < opp_cnt; ++i) { + mpc->funcs->set_out_rate_control( + mpc, opp_inst[i], + false, + 0, + NULL); + } + } + for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control( odm_pipe->stream_res.opp, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c index 6bdfbf22ce87..21ef03a76229 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn314/dcn314_init.c @@ -91,7 +91,6 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, @@ -100,7 +99,7 @@ static const struct hw_sequencer_funcs dcn314_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .set_backlight_level = dcn31_set_backlight_level, + .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, .enable_lvds_link_output = dce110_enable_lvds_link_output, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c index fa11f075d1f9..2e6ecf939fbd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_hwseq.c @@ -985,6 +985,7 @@ void dcn32_init_hw(struct dc *dc) dc->caps.dmub_caps.subvp_psr = dc->ctx->dmub_srv->dmub->feature_caps.subvp_psr_support; dc->caps.dmub_caps.gecc_enable = dc->ctx->dmub_srv->dmub->feature_caps.gecc_enable; dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; + dc->caps.dmub_caps.aux_backlight_support = dc->ctx->dmub_srv->dmub->feature_caps.abm_aux_backlight_support; /* for DCN401 testing only */ dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c index 5ecee7e320da..e4d149eff10f 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn32/dcn32_init.c @@ -87,7 +87,6 @@ static const struct hw_sequencer_funcs dcn32_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index e599cdc465bf..d5f76cc69c73 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -426,6 +426,8 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * int opp_inst[MAX_PIPES] = {0}; int odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, false); int last_odm_slice_width = resource_get_odm_slice_dst_width(pipe_ctx, true); + struct mpc *mpc = dc->res_pool->mpc; + int i; opp_cnt = get_odm_config(pipe_ctx, opp_inst); @@ -438,6 +440,16 @@ void dcn35_update_odm(struct dc *dc, struct dc_state *context, struct pipe_ctx * pipe_ctx->stream_res.tg->funcs->set_odm_bypass( pipe_ctx->stream_res.tg, &pipe_ctx->stream->timing); + if (mpc->funcs->set_out_rate_control) { + for (i = 0; i < opp_cnt; ++i) { + mpc->funcs->set_out_rate_control( + mpc, opp_inst[i], + false, + 0, + NULL); + } + } + for (odm_pipe = pipe_ctx->next_odm_pipe; odm_pipe; odm_pipe = odm_pipe->next_odm_pipe) { odm_pipe->stream_res.opp->funcs->opp_pipe_clock_control( odm_pipe->stream_res.opp, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c index fd67779c27a9..5ca8db2b2d03 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_init.c @@ -92,7 +92,6 @@ static const struct hw_sequencer_funcs dcn35_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c index 3c275a1eff58..4f73e7f551ac 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn351/dcn351_init.c @@ -91,7 +91,6 @@ static const struct hw_sequencer_funcs dcn351_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c index 307782592789..8cb0fbd301d8 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c @@ -435,7 +435,8 @@ void dcn401_init_hw(struct dc *dc) dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr; dc->caps.dmub_caps.mclk_sw = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver > 0; dc->caps.dmub_caps.fams_ver = dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver; - dc->debug.fams2_config.bits.enable &= dc->ctx->dmub_srv->dmub->feature_caps.fw_assisted_mclk_switch_ver == 2; + dc->debug.fams2_config.bits.enable &= + dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver; // sw & fw fams versions must match for support if ((!dc->debug.fams2_config.bits.enable && dc->res_pool->funcs->update_bw_bounding_box) || res_pool->ref_clocks.dchub_ref_clock_inKhz / 1000 != current_dchub_ref_freq) { /* update bounding box if FAMS2 disabled, or if dchub clk has changed */ @@ -821,7 +822,7 @@ enum dc_status dcn401_enable_stream_timing( int opp_inst[MAX_PIPES] = {0}; struct pipe_ctx *opp_heads[MAX_PIPES] = {0}; struct dc_crtc_timing patched_crtc_timing = stream->timing; - bool manual_mode; + bool manual_mode = false; unsigned int tmds_div = PIXEL_RATE_DIV_NA; unsigned int unused_div = PIXEL_RATE_DIV_NA; int odm_slice_width; diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c index 23e4f208152e..b30f665d98a6 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_init.c @@ -66,7 +66,6 @@ static const struct hw_sequencer_funcs dcn401_funcs = { .enable_writeback = dcn30_enable_writeback, .disable_writeback = dcn30_disable_writeback, .update_writeback = dcn30_update_writeback, - .mmhubbub_warmup = dcn30_mmhubbub_warmup, .dmdata_status_done = dcn20_dmdata_status_done, .program_dmdata_engine = dcn30_program_dmdata_engine, .set_dmdata_attributes = dcn20_set_dmdata_attributes, diff --git a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h index 66fdc5805d0a..a5bb10d7b160 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/hwss/hw_sequencer.h @@ -331,10 +331,6 @@ struct hw_sequencer_funcs { void (*disable_writeback)(struct dc *dc, unsigned int dwb_pipe_inst); - bool (*mmhubbub_warmup)(struct dc *dc, - unsigned int num_dwb, - struct dc_writeback_info *wb_info); - /* Clock Related */ enum dc_status (*set_clock)(struct dc *dc, enum dc_clock_type clock_type, diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 2edd5b38ce4f..06a420154888 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -45,9 +45,6 @@ #define MAX_SVP_PHANTOM_STREAMS 2 #define MAX_SVP_PHANTOM_PLANES 2 -void enable_surface_flip_reporting(struct dc_plane_state *plane_state, - uint32_t controller_id); - #include "grph_object_id.h" #include "link_encoder.h" #include "stream_encoder.h" @@ -542,7 +539,8 @@ struct dcn_bw_output { bool legacy_svp_drr_stream_index_valid; struct dml2_mcache_surface_allocation mcache_allocations[DML2_MAX_PLANES]; struct dmub_cmd_fams2_global_config fams2_global_config; - struct dmub_fams2_stream_static_state fams2_stream_params[DML2_MAX_PLANES]; + union dmub_cmd_fams2_config fams2_stream_base_params[DML2_MAX_PLANES]; + union dmub_cmd_fams2_config fams2_stream_sub_params[DML2_MAX_PLANES]; struct dml2_display_arb_regs arb_regs; }; diff --git a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h index 55529c5f471c..d19a595c2be4 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h +++ b/drivers/gpu/drm/amd/display/dc/inc/dcn_calcs.h @@ -624,10 +624,6 @@ bool dcn_validate_bandwidth( struct dc_state *context, bool fast_validate); -unsigned int dcn_find_dcfclk_suits_all( - const struct dc *dc, - struct dc_clocks *clocks); - void dcn_get_soc_clks( struct dc *dc, int *min_fclk_khz, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h index c2dd061892f4..7a1ca1e98059 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h @@ -166,6 +166,41 @@ enum dentist_divider_range { CLK_SR_DCN32(CLK1_CLK4_CURRENT_CNT), \ CLK_SR_DCN32(CLK4_CLK0_CURRENT_CNT) +#define CLK_REG_LIST_DCN35() \ + CLK_SR_DCN35(CLK1_CLK_PLL_REQ), \ + CLK_SR_DCN35(CLK1_CLK0_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK1_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK2_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK3_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK4_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK5_DFS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK0_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK1_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK2_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK3_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK4_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK5_CURRENT_CNT), \ + CLK_SR_DCN35(CLK1_CLK0_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK1_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK2_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK3_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK4_BYPASS_CNTL),\ + CLK_SR_DCN35(CLK1_CLK5_BYPASS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK0_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK1_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK2_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK3_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK4_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK5_DS_CNTL), \ + CLK_SR_DCN35(CLK1_CLK0_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK1_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK2_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK3_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK4_ALLOW_DS), \ + CLK_SR_DCN35(CLK1_CLK5_ALLOW_DS), \ + CLK_SR_DCN35(CLK5_spll_field_8), \ + SR(DENTIST_DISPCLK_CNTL), \ + #define CLK_COMMON_MASK_SH_LIST_DCN32(mask_sh) \ CLK_COMMON_MASK_SH_LIST_DCN20_BASE(mask_sh),\ CLK_SF(CLK1_CLK_PLL_REQ, FbMult_int, mask_sh),\ @@ -236,6 +271,7 @@ struct clk_mgr_registers { uint32_t CLK1_CLK2_DFS_CNTL; uint32_t CLK1_CLK3_DFS_CNTL; uint32_t CLK1_CLK4_DFS_CNTL; + uint32_t CLK1_CLK5_DFS_CNTL; uint32_t CLK2_CLK2_DFS_CNTL; uint32_t CLK1_CLK0_CURRENT_CNT; @@ -243,11 +279,34 @@ struct clk_mgr_registers { uint32_t CLK1_CLK2_CURRENT_CNT; uint32_t CLK1_CLK3_CURRENT_CNT; uint32_t CLK1_CLK4_CURRENT_CNT; + uint32_t CLK1_CLK5_CURRENT_CNT; uint32_t CLK0_CLK0_DFS_CNTL; uint32_t CLK0_CLK1_DFS_CNTL; uint32_t CLK0_CLK3_DFS_CNTL; uint32_t CLK0_CLK4_DFS_CNTL; + uint32_t CLK1_CLK0_BYPASS_CNTL; + uint32_t CLK1_CLK1_BYPASS_CNTL; + uint32_t CLK1_CLK2_BYPASS_CNTL; + uint32_t CLK1_CLK3_BYPASS_CNTL; + uint32_t CLK1_CLK4_BYPASS_CNTL; + uint32_t CLK1_CLK5_BYPASS_CNTL; + + uint32_t CLK1_CLK0_DS_CNTL; + uint32_t CLK1_CLK1_DS_CNTL; + uint32_t CLK1_CLK2_DS_CNTL; + uint32_t CLK1_CLK3_DS_CNTL; + uint32_t CLK1_CLK4_DS_CNTL; + uint32_t CLK1_CLK5_DS_CNTL; + + uint32_t CLK1_CLK0_ALLOW_DS; + uint32_t CLK1_CLK1_ALLOW_DS; + uint32_t CLK1_CLK2_ALLOW_DS; + uint32_t CLK1_CLK3_ALLOW_DS; + uint32_t CLK1_CLK4_ALLOW_DS; + uint32_t CLK1_CLK5_ALLOW_DS; + uint32_t CLK5_spll_field_8; + }; struct clk_mgr_shift { diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 16580d624278..d0878fc0cc94 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -275,6 +275,7 @@ struct hubp_funcs { enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cb_b, enum hubp_3dlut_fl_crossbar_bit_slice bit_slice_cr_r); int (*hubp_get_3dlut_fl_done)(struct hubp *hubp); + void (*hubp_clear_tiling)(struct hubp *hubp); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h index a8b44f398ce6..4f5d102455ca 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mem_input.h @@ -187,6 +187,8 @@ struct mem_input_funcs { const struct dc_cursor_position *pos, const struct dc_cursor_mi_param *param); + void (*mem_input_clear_tiling)( + struct mem_input *mem_input); }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c index 4fb9cd6708d5..1d61d475d36f 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c @@ -30,8 +30,8 @@ #include "../dce110/irq_service_dce110.h" #include "irq_service_dcn201.h" -#include "dcn/dcn_2_0_3_offset.h" -#include "dcn/dcn_2_0_3_sh_mask.h" +#include "dcn/dcn_2_0_1_offset.h" +#include "dcn/dcn_2_0_1_sh_mask.h" #include "cyan_skillfish_ip_offset.h" #include "soc15_hw_ip.h" diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c index 5d66bfc7fe6e..60e64e0138a3 100644 --- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c +++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c @@ -1953,6 +1953,10 @@ static void enable_link_hdmi(struct pipe_ctx *pipe_ctx) stream->phy_pix_clk = stream->timing.pix_clk_100hz / 10; if (stream->phy_pix_clk > 340000) is_over_340mhz = true; + if (dc_is_tmds_signal(stream->signal) && stream->phy_pix_clk > 6000000UL) { + ASSERT(false); + return; + } if (dc_is_hdmi_signal(pipe_ctx->stream->signal)) { unsigned short masked_chip_caps = pipe_ctx->stream->link->chip_caps & diff --git a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c index 9dabaf682171..ce9d799d2386 100644 --- a/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c +++ b/drivers/gpu/drm/amd/display/dc/link/protocols/link_dp_capability.c @@ -1632,13 +1632,6 @@ static bool retrieve_link_cap(struct dc_link *link) sizeof(link->dpcd_caps.lttpr_caps.phy_repeater_cnt)); } - /* Read DP tunneling information. */ - if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { - status = dpcd_get_tunneling_device_data(link); - if (status != DC_OK) - dm_error("%s: Read tunneling device data failed.\n", __func__); - } - dpcd_set_source_specific_data(link); /* Sink may need to configure internals based on vendor, so allow some * time before proceeding with possibly vendor specific transactions @@ -1711,7 +1704,7 @@ static bool retrieve_link_cap(struct dc_link *link) link->dpcd_caps.dprx_feature.raw = dpcd_dprx_data; if (status != DC_OK) - dm_error("%s: Read DPRX caps data failed.\n", __func__); + dm_error("%s: Read DPRX feature list failed.\n", __func__); /* AdaptiveSyncCapability */ dpcd_dprx_data = 0; @@ -1726,15 +1719,13 @@ static bool retrieve_link_cap(struct dc_link *link) link->dpcd_caps.adaptive_sync_caps.dp_adap_sync_caps.raw = dpcd_dprx_data; if (status != DC_OK) - dm_error("%s: Read DPRX caps data failed. Addr:%#x\n", + dm_error("%s: Read DPRX feature list_1 failed. Addr:%#x\n", __func__, DP_DPRX_FEATURE_ENUMERATION_LIST_CONT_1); } - else { link->dpcd_caps.dprx_feature.raw = 0; } - /* Error condition checking... * It is impossible for Sink to report Max Lane Count = 0. * It is possible for Sink to report Max Link Rate = 0, if it is @@ -1918,6 +1909,7 @@ static bool retrieve_link_cap(struct dc_link *link) if (link->dpcd_caps.channel_coding_cap.bits.DP_128b_132b_SUPPORTED) { DC_LOG_DP2("128b/132b encoding is supported at link %d", link->link_index); + /* Read 128b/132b suppoerted link rates */ core_link_read_dpcd(link, DP_128B132B_SUPPORTED_LINK_RATES, &link->dpcd_caps.dp_128b_132b_supported_link_rates.raw, @@ -1965,6 +1957,13 @@ static bool retrieve_link_cap(struct dc_link *link) link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw, sizeof(link->dpcd_caps.max_uncompressed_pixel_rate_cap.raw)); + /* Read DP tunneling information. */ + if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) { + status = dpcd_get_tunneling_device_data(link); + if (status != DC_OK) + dm_error("%s: Read DP tunneling device data failed.\n", __func__); + } + retrieve_cable_id(link); dpcd_write_cable_id_to_dprx(link); diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c index fe26fde12eeb..85298b8a1b5e 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.c @@ -110,6 +110,23 @@ void mpc3_disable_dwb_mux( MPC_DWB0_MUX, 0xf); } +void mpc3_set_out_rate_control( + struct mpc *mpc, + int opp_id, + bool enable, + bool rate_2x_mode, + struct mpc_dwb_flow_control *flow_control) +{ + struct dcn30_mpc *mpc30 = TO_DCN30_MPC(mpc); + + /* Always disable mpc out rate and flow control. + * MPC flow rate control is not needed for DCN30 and above. + */ + REG_UPDATE_2(MUX[opp_id], + MPC_OUT_RATE_CONTROL_DISABLE, 1, + MPC_OUT_RATE_CONTROL, 0); +} + enum dc_lut_mode mpc3_get_ogam_current(struct mpc *mpc, int mpcc_id) { /*Contrary to DCN2 and DCN1 wherein a single status register field holds this info; @@ -1519,6 +1536,7 @@ static const struct mpc_funcs dcn30_mpc_funcs = { .set_dwb_mux = mpc3_set_dwb_mux, .disable_dwb_mux = mpc3_disable_dwb_mux, .is_dwb_idle = mpc3_is_dwb_idle, + .set_out_rate_control = mpc3_set_out_rate_control, .set_gamut_remap = mpc3_set_gamut_remap, .program_shaper = mpc3_program_shaper, .acquire_rmu = mpcc3_acquire_rmu, diff --git a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h index ce93003dae01..103f29900a2c 100644 --- a/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h +++ b/drivers/gpu/drm/amd/display/dc/mpc/dcn30/dcn30_mpc.h @@ -1085,6 +1085,13 @@ bool mpc3_is_dwb_idle( struct mpc *mpc, int dwb_id); +void mpc3_set_out_rate_control( + struct mpc *mpc, + int opp_id, + bool enable, + bool rate_2x_mode, + struct mpc_dwb_flow_control *flow_control); + void mpc3_power_on_ogam_lut( struct mpc *mpc, int mpcc_id, bool power_on); diff --git a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c index 783ca9acc762..338a0cad23a5 100644 --- a/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c +++ b/drivers/gpu/drm/amd/display/dc/optc/dcn401/dcn401_optc.c @@ -315,7 +315,7 @@ void optc401_set_drr( struct drr_params amended_params = { 0 }; bool program_manual_trigger = false; - if (dc->caps.dmub_caps.fams_ver >= 2 && dc->debug.fams2_config.bits.enable) { + if (dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver && dc->debug.fams2_config.bits.enable) { if (params != NULL && params->vertical_total_max > 0 && params->vertical_total_min > 0) { @@ -380,7 +380,7 @@ void optc401_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, i { struct dc *dc = optc->ctx->dc; - if (dc->caps.dmub_caps.fams_ver >= 2 && dc->debug.fams2_config.bits.enable) { + if (dc->caps.dmub_caps.fams_ver == dc->debug.fams_version.ver && dc->debug.fams2_config.bits.enable) { /* FAMS2 */ dc_dmub_srv_fams2_drr_update(dc, optc->inst, vtotal_min, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c index 7a5b9aa5292c..5c616b1f7bf7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c @@ -1509,60 +1509,9 @@ bool dcn20_split_stream_for_odm( next_odm_pipe->prev_odm_pipe = prev_odm_pipe; if (prev_odm_pipe->plane_state) { - struct scaler_data *sd = &prev_odm_pipe->plane_res.scl_data; - struct output_pixel_processor *opp = next_odm_pipe->stream_res.opp; - int new_width; - - /* HACTIVE halved for odm combine */ - sd->h_active /= 2; - /* Calculate new vp and recout for left pipe */ - /* Need at least 16 pixels width per side */ - if (sd->recout.x + 16 >= sd->h_active) - return false; - new_width = sd->h_active - sd->recout.x; - sd->viewport.width -= dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz, sd->recout.width - new_width)); - sd->viewport_c.width -= dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz_c, sd->recout.width - new_width)); - sd->recout.width = new_width; - - /* Calculate new vp and recout for right pipe */ - sd = &next_odm_pipe->plane_res.scl_data; - /* HACTIVE halved for odm combine */ - sd->h_active /= 2; - /* Need at least 16 pixels width per side */ - if (new_width <= 16) - return false; - new_width = sd->recout.width + sd->recout.x - sd->h_active; - sd->viewport.width -= dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz, sd->recout.width - new_width)); - sd->viewport_c.width -= dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz_c, sd->recout.width - new_width)); - sd->recout.width = new_width; - sd->viewport.x += dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz, sd->h_active - sd->recout.x)); - sd->viewport_c.x += dc_fixpt_floor(dc_fixpt_mul_int( - sd->ratios.horz_c, sd->h_active - sd->recout.x)); - sd->recout.x = 0; - - /* - * When odm is used in YcbCr422 or 420 colour space, a split screen - * will be seen with the previous calculations since the extra left - * edge pixel is accounted for in fmt but not in viewport. - * - * Below are calculations which fix the split by fixing the calculations - * if there is an extra left edge pixel. - */ - if (opp && opp->funcs->opp_get_left_edge_extra_pixel_count - && opp->funcs->opp_get_left_edge_extra_pixel_count( - opp, next_odm_pipe->stream->timing.pixel_encoding, - resource_is_pipe_type(next_odm_pipe, OTG_MASTER)) == 1) { - sd->h_active += 1; - sd->recout.width += 1; - sd->viewport.x -= dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); - sd->viewport_c.x -= dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); - sd->viewport_c.width += dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); - sd->viewport.width += dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1)); + if (!resource_build_scaling_params(prev_odm_pipe) || + !resource_build_scaling_params(next_odm_pipe)) { + return false; } } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c index d3d67d366523..9f37f0097feb 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn201/dcn201_resource.c @@ -59,8 +59,8 @@ #include "cyan_skillfish_ip_offset.h" -#include "dcn/dcn_2_0_3_offset.h" -#include "dcn/dcn_2_0_3_sh_mask.h" +#include "dcn/dcn_2_0_1_offset.h" +#include "dcn/dcn_2_0_1_sh_mask.h" #include "dpcs/dpcs_2_0_3_offset.h" #include "dpcs/dpcs_2_0_3_sh_mask.h" diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index 2a3dabfe3cea..09f5b8b40791 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -726,6 +726,10 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_unbounded_requesting = false, .enable_legacy_fast_update = false, .dcc_meta_propagation_delay_us = 10, + .fams_version = { + .minor = 1, + .major = 2, + }, //v2.1 .fams2_config = { .bits = { .enable = true, diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c index 73a65913cb12..1306ce0321e2 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl.c @@ -137,15 +137,32 @@ static struct spl_rect calculate_mpc_slice_in_timing_active( struct spl_in *spl_in, struct spl_rect *plane_clip_rec) { - int mpc_slice_count = spl_in->basic_in.mpc_combine_h; - int mpc_slice_idx = spl_in->basic_in.mpc_combine_v; + bool use_recout_width_aligned = + spl_in->basic_in.num_h_slices_recout_width_align.use_recout_width_aligned; + int mpc_slice_count = + spl_in->basic_in.num_h_slices_recout_width_align.num_slices_recout_width.mpc_num_h_slices; + int recout_width_align = + spl_in->basic_in.num_h_slices_recout_width_align.num_slices_recout_width.mpc_recout_width_align; + int mpc_slice_idx = spl_in->basic_in.mpc_h_slice_index; int epimo = mpc_slice_count - plane_clip_rec->width % mpc_slice_count - 1; struct spl_rect mpc_rec; - mpc_rec.width = plane_clip_rec->width / mpc_slice_count; - mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx; - mpc_rec.height = plane_clip_rec->height; - mpc_rec.y = plane_clip_rec->y; + if (use_recout_width_aligned) { + mpc_rec.width = recout_width_align; + if ((mpc_rec.width * (mpc_slice_idx + 1)) > plane_clip_rec->width) { + mpc_rec.width = plane_clip_rec->width % recout_width_align; + mpc_rec.x = plane_clip_rec->x + recout_width_align * mpc_slice_idx; + } else + mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx; + mpc_rec.height = plane_clip_rec->height; + mpc_rec.y = plane_clip_rec->y; + + } else { + mpc_rec.width = plane_clip_rec->width / mpc_slice_count; + mpc_rec.x = plane_clip_rec->x + mpc_rec.width * mpc_slice_idx; + mpc_rec.height = plane_clip_rec->height; + mpc_rec.y = plane_clip_rec->y; + } SPL_ASSERT(mpc_slice_count == 1 || spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE || mpc_rec.width % 2 == 0); @@ -546,6 +563,24 @@ static bool spl_is_rgb8(enum spl_pixel_format format) return false; } +static bool spl_is_video_format(enum spl_pixel_format format) +{ + if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN + && format <= SPL_PIXEL_FORMAT_VIDEO_END) + return true; + else + return false; +} + +static bool spl_is_subsampled_format(enum spl_pixel_format format) +{ + if (format >= SPL_PIXEL_FORMAT_SUBSAMPLED_BEGIN + && format <= SPL_PIXEL_FORMAT_SUBSAMPLED_END) + return true; + else + return false; +} + /*Calculate inits and viewport */ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, struct spl_scratch *spl_scratch) @@ -590,7 +625,7 @@ static void spl_calculate_inits_and_viewports(struct spl_in *spl_in, spl_swap(flip_vert_scan_dir, flip_horz_scan_dir); } - if (spl_is_yuv420(spl_in->basic_in.format)) { + if (spl_is_subsampled_format(spl_in->basic_in.format)) { /* this gives the direction of the cositing (negative will move * left, right otherwise) */ @@ -678,7 +713,7 @@ static void spl_handle_3d_recout(struct spl_in *spl_in, struct spl_rect *recout) * since 3d is special and needs to calculate vp as if there is no recout offset * This may break with rotation, good thing we aren't mixing hw rotation and 3d */ - if (spl_in->basic_in.mpc_combine_v) { + if (spl_in->basic_in.mpc_h_slice_index) { SPL_ASSERT(spl_in->basic_in.rotation == SPL_ROTATION_ANGLE_0 || (spl_in->basic_out.view_format != SPL_VIEW_3D_TOP_AND_BOTTOM && spl_in->basic_out.view_format != SPL_VIEW_3D_SIDE_BY_SIDE)); @@ -698,24 +733,6 @@ static void spl_clamp_viewport(struct spl_rect *viewport) viewport->width = MIN_VIEWPORT_SIZE; } -static bool spl_dscl_is_420_format(enum spl_pixel_format format) -{ - if (format == SPL_PIXEL_FORMAT_420BPP8 || - format == SPL_PIXEL_FORMAT_420BPP10) - return true; - else - return false; -} - -static bool spl_dscl_is_video_format(enum spl_pixel_format format) -{ - if (format >= SPL_PIXEL_FORMAT_VIDEO_BEGIN - && format <= SPL_PIXEL_FORMAT_VIDEO_END) - return true; - else - return false; -} - static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, const struct spl_scaler_data *data, bool enable_isharp, bool enable_easf) @@ -732,8 +749,8 @@ static enum scl_mode spl_get_dscl_mode(const struct spl_in *spl_in, && !enable_isharp) return SCL_MODE_SCALING_444_BYPASS; - if (!spl_dscl_is_420_format(pixel_format)) { - if (spl_dscl_is_video_format(pixel_format)) + if (!spl_is_subsampled_format(pixel_format)) { + if (spl_is_video_format(pixel_format)) return SCL_MODE_SCALING_444_YCBCR_ENABLE; else return SCL_MODE_SCALING_444_RGB_ENABLE; @@ -756,7 +773,7 @@ static bool spl_choose_lls_policy(enum spl_pixel_format format, enum spl_transfer_func_predefined tf_predefined_type, enum linear_light_scaling *lls_pref) { - if (spl_is_yuv420(format)) { + if (spl_is_video_format(format)) { *lls_pref = LLS_PREF_NO; if ((tf_type == SPL_TF_TYPE_PREDEFINED) || (tf_type == SPL_TF_TYPE_DISTRIBUTED_POINTS)) @@ -815,7 +832,7 @@ static bool enable_easf(struct spl_in *spl_in, struct spl_scratch *spl_scratch) /* Check if video is in fullscreen mode */ static bool spl_is_video_fullscreen(struct spl_in *spl_in) { - if (spl_is_yuv420(spl_in->basic_in.format) && spl_in->is_fullscreen) + if (spl_is_video_format(spl_in->basic_in.format) && spl_in->is_fullscreen) return true; return false; } @@ -846,10 +863,10 @@ static bool spl_get_isharp_en(struct spl_in *spl_in, * Apply sharpness to RGB and YUV (NV12/P010) * surfaces based on policy setting */ - if (!spl_is_yuv420(spl_in->basic_in.format) && + if (!spl_is_video_format(spl_in->basic_in.format) && (spl_in->sharpen_policy == SHARPEN_YUV)) return enable_isharp; - else if ((spl_is_yuv420(spl_in->basic_in.format) && !fullscreen) && + else if ((spl_is_video_format(spl_in->basic_in.format) && !fullscreen) && (spl_in->sharpen_policy == SHARPEN_RGB_FULLSCREEN_YUV)) return enable_isharp; else if (!spl_in->is_fullscreen && @@ -882,8 +899,8 @@ static void spl_get_taps_non_adaptive_scaler( if (in_taps->v_taps == 0) { if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert) > 1) - spl_scratch->scl_data.taps.v_taps = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( - spl_scratch->scl_data.ratios.vert, 2)), 8); + spl_scratch->scl_data.taps.v_taps = spl_min(2 * spl_fixpt_ceil( + spl_scratch->scl_data.ratios.vert), 8); else spl_scratch->scl_data.taps.v_taps = 4; } else @@ -891,8 +908,8 @@ static void spl_get_taps_non_adaptive_scaler( if (in_taps->v_taps_c == 0) { if (spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c) > 1) - spl_scratch->scl_data.taps.v_taps_c = spl_min(spl_fixpt_ceil(spl_fixpt_mul_int( - spl_scratch->scl_data.ratios.vert_c, 2)), 8); + spl_scratch->scl_data.taps.v_taps_c = spl_min(2 * spl_fixpt_ceil( + spl_scratch->scl_data.ratios.vert_c), 8); else spl_scratch->scl_data.taps.v_taps_c = 4; } else @@ -932,7 +949,7 @@ static bool spl_get_optimal_number_of_taps( int min_taps_y, min_taps_c; enum lb_memory_config lb_config; bool skip_easf = false; - bool is_ycbcr = spl_dscl_is_video_format(spl_in->basic_in.format); + bool is_subsampled = spl_is_subsampled_format(spl_in->basic_in.format); if (spl_scratch->scl_data.viewport.width > spl_scratch->scl_data.h_active && max_downscale_src_width != 0 && @@ -964,7 +981,7 @@ static bool spl_get_optimal_number_of_taps( if (skip_easf) spl_get_taps_non_adaptive_scaler(spl_scratch, in_taps); else { - if (spl_is_yuv420(spl_in->basic_in.format)) { + if (spl_is_video_format(spl_in->basic_in.format)) { spl_scratch->scl_data.taps.h_taps = 6; spl_scratch->scl_data.taps.v_taps = 6; spl_scratch->scl_data.taps.h_taps_c = 4; @@ -982,8 +999,7 @@ static bool spl_get_optimal_number_of_taps( min_taps_c = spl_fixpt_ceil(spl_scratch->scl_data.ratios.vert_c); /* Use LB_MEMORY_CONFIG_3 for 4:2:0 */ - if ((spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP8) - || (spl_in->basic_in.format == SPL_PIXEL_FORMAT_420BPP10)) + if (spl_is_yuv420(spl_in->basic_in.format)) lb_config = LB_MEMORY_CONFIG_3; else lb_config = LB_MEMORY_CONFIG_0; @@ -1039,13 +1055,11 @@ static bool spl_get_optimal_number_of_taps( if (spl_scratch->scl_data.taps.h_taps_c == 5) spl_scratch->scl_data.taps.h_taps_c = 4; - if (spl_is_yuv420(spl_in->basic_in.format)) { - if ((spl_scratch->scl_data.taps.h_taps <= 4) || - (spl_scratch->scl_data.taps.h_taps_c <= 3)) { + if (spl_is_video_format(spl_in->basic_in.format)) { + if (spl_scratch->scl_data.taps.h_taps <= 4) { *enable_easf_v = false; *enable_easf_h = false; - } else if ((spl_scratch->scl_data.taps.v_taps <= 3) || - (spl_scratch->scl_data.taps.v_taps_c <= 3)) { + } else if (spl_scratch->scl_data.taps.v_taps <= 3) { *enable_easf_v = false; *enable_easf_h = true; } else { @@ -1086,10 +1100,10 @@ static bool spl_get_optimal_number_of_taps( spl_scratch->scl_data.taps.h_taps = 1; spl_scratch->scl_data.taps.v_taps = 1; - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !is_ycbcr) + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c) && !is_subsampled) spl_scratch->scl_data.taps.h_taps_c = 1; - if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !is_ycbcr) + if (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c) && !is_subsampled) spl_scratch->scl_data.taps.v_taps_c = 1; *enable_easf_v = false; @@ -1103,11 +1117,11 @@ static bool spl_get_optimal_number_of_taps( (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert))) spl_scratch->scl_data.taps.v_taps = 1; - if ((!*enable_easf_h) && !is_ycbcr && + if ((!*enable_easf_h) && !is_subsampled && (IDENTITY_RATIO(spl_scratch->scl_data.ratios.horz_c))) spl_scratch->scl_data.taps.h_taps_c = 1; - if ((!*enable_easf_v) && !is_ycbcr && + if ((!*enable_easf_v) && !is_subsampled && (IDENTITY_RATIO(spl_scratch->scl_data.ratios.vert_c))) spl_scratch->scl_data.taps.v_taps_c = 1; } @@ -1118,7 +1132,7 @@ static bool spl_get_optimal_number_of_taps( static void spl_set_black_color_data(enum spl_pixel_format format, struct scl_black_color *scl_black_color) { - bool ycbcr = spl_dscl_is_video_format(format); + bool ycbcr = spl_is_video_format(format); if (ycbcr) { scl_black_color->offset_rgb_y = BLACK_OFFSET_RGB_Y; scl_black_color->offset_rgb_cbcr = BLACK_OFFSET_CBCR; @@ -1585,7 +1599,7 @@ static void spl_set_easf_data(struct spl_scratch *spl_scratch, struct spl_out *s 0x0; // fp1.5.10, C3 coefficient } - if (spl_is_yuv420(format)) { /* TODO: 0 = RGB, 1 = YUV */ + if (spl_is_video_format(format)) { /* TODO: 0 = RGB, 1 = YUV */ dscl_prog_data->easf_matrix_mode = 1; /* * 2-bit, BF3 chroma mode correction calculation mode diff --git a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h index 55d557df4aa5..467af9dd90de 100644 --- a/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h +++ b/drivers/gpu/drm/amd/display/dc/spl/dc_spl_types.h @@ -63,13 +63,13 @@ enum spl_pixel_format { SPL_PIXEL_FORMAT_420BPP8, SPL_PIXEL_FORMAT_420BPP10, /*end of pixel format definition*/ - SPL_PIXEL_FORMAT_INVALID, - SPL_PIXEL_FORMAT_422BPP8, - SPL_PIXEL_FORMAT_422BPP10, SPL_PIXEL_FORMAT_GRPH_BEGIN = SPL_PIXEL_FORMAT_INDEX8, SPL_PIXEL_FORMAT_GRPH_END = SPL_PIXEL_FORMAT_FP16, + SPL_PIXEL_FORMAT_SUBSAMPLED_BEGIN = SPL_PIXEL_FORMAT_420BPP8, + SPL_PIXEL_FORMAT_SUBSAMPLED_END = SPL_PIXEL_FORMAT_420BPP10, SPL_PIXEL_FORMAT_VIDEO_BEGIN = SPL_PIXEL_FORMAT_420BPP8, SPL_PIXEL_FORMAT_VIDEO_END = SPL_PIXEL_FORMAT_420BPP10, + SPL_PIXEL_FORMAT_INVALID, SPL_PIXEL_FORMAT_UNKNOWN }; @@ -436,8 +436,14 @@ struct basic_in { struct spl_rect clip_rect; // Clip rect enum spl_rotation_angle rotation; // Rotation bool horizontal_mirror; // Horizontal mirror - int mpc_combine_h; // MPC Horizontal Combine Factor (split_count) - int mpc_combine_v; // MPC Vertical Combine Factor (split_idx) + struct { // previous mpc_combine_h - split count + bool use_recout_width_aligned; + union { + int mpc_num_h_slices; + int mpc_recout_width_align; + } num_slices_recout_width; + } num_h_slices_recout_width_align; + int mpc_h_slice_index; // previous mpc_combine_v - split_idx // Inputs for adaptive scaler - TODO enum spl_transfer_func_type tf_type; /* Transfer function type */ enum spl_transfer_func_predefined tf_predefined_type; /* Transfer function predefined type */ diff --git a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h index b353c4ceb60d..4b3ccbca0da2 100644 --- a/drivers/gpu/drm/amd/display/dmub/dmub_srv.h +++ b/drivers/gpu/drm/amd/display/dmub/dmub_srv.h @@ -69,6 +69,9 @@ #define DMUB_PC_SNAPSHOT_COUNT 10 +/* Default tracebuffer size if meta is absent. */ +#define DMUB_TRACE_BUFFER_SIZE (64 * 1024) + /* Forward declarations */ struct dmub_srv; struct dmub_srv_common_regs; diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h index b800a507d1e0..59990929e44e 100644 --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h @@ -475,11 +475,23 @@ union replay_hw_flags { * Use TPS3 signal when restore main link. */ uint32_t force_wakeup_by_tps3 : 1; + /** + * @is_alpm_initialized: Indicates whether ALPM is initialized + */ + uint32_t is_alpm_initialized : 1; } bitfields; uint32_t u32All; }; +union fw_assisted_mclk_switch_version { + struct { + uint8_t minor : 5; + uint8_t major : 3; + }; + uint8_t ver; +}; + /** * DMUB feature capabilities. * After DMUB init, driver will query FW capabilities prior to enabling certain features. @@ -1823,52 +1835,11 @@ enum fams2_stream_type { FAMS2_STREAM_TYPE_SUBVP = 4, }; -/* dynamic stream state */ -struct dmub_fams2_legacy_stream_dynamic_state { - uint8_t force_allow_at_vblank; - uint8_t pad[3]; -}; - -struct dmub_fams2_subvp_stream_dynamic_state { - uint16_t viewport_start_hubp_vline; - uint16_t viewport_height_hubp_vlines; - uint16_t viewport_start_c_hubp_vline; - uint16_t viewport_height_c_hubp_vlines; - uint16_t phantom_viewport_height_hubp_vlines; - uint16_t phantom_viewport_height_c_hubp_vlines; - uint16_t microschedule_start_otg_vline; - uint16_t mall_start_otg_vline; - uint16_t mall_start_hubp_vline; - uint16_t mall_start_c_hubp_vline; - uint8_t force_allow_at_vblank_only; - uint8_t pad[3]; -}; - -struct dmub_fams2_drr_stream_dynamic_state { - uint16_t stretched_vtotal; - uint8_t use_cur_vtotal; - uint8_t pad; -}; - -struct dmub_fams2_stream_dynamic_state { - uint64_t ref_tick; - uint32_t cur_vtotal; - uint16_t adjusted_allow_end_otg_vline; - uint8_t pad[2]; - struct dmub_optc_position ref_otg_pos; - struct dmub_optc_position target_otg_pos; - union { - struct dmub_fams2_legacy_stream_dynamic_state legacy; - struct dmub_fams2_subvp_stream_dynamic_state subvp; - struct dmub_fams2_drr_stream_dynamic_state drr; - } sub_state; -}; - /* static stream state */ struct dmub_fams2_legacy_stream_static_state { uint8_t vactive_det_fill_delay_otg_vlines; uint8_t programming_delay_otg_vlines; -}; +}; //v0 struct dmub_fams2_subvp_stream_static_state { uint16_t vratio_numerator; @@ -1887,14 +1858,59 @@ struct dmub_fams2_subvp_stream_static_state { uint8_t phantom_otg_inst; uint8_t phantom_pipe_mask; uint8_t phantom_plane_pipe_masks[DMUB_MAX_PHANTOM_PLANES]; // phantom pipe mask per plane (for flip passthrough) -}; +}; //v0 struct dmub_fams2_drr_stream_static_state { uint16_t nom_stretched_vtotal; uint8_t programming_delay_otg_vlines; uint8_t only_stretch_if_required; uint8_t pad[2]; -}; +}; //v0 + +struct dmub_fams2_cmd_legacy_stream_static_state { + uint16_t vactive_det_fill_delay_otg_vlines; + uint16_t programming_delay_otg_vlines; +}; //v1 + +struct dmub_fams2_cmd_subvp_stream_static_state { + uint16_t vratio_numerator; + uint16_t vratio_denominator; + uint16_t phantom_vtotal; + uint16_t phantom_vactive; + uint16_t programming_delay_otg_vlines; + uint16_t prefetch_to_mall_otg_vlines; + union { + struct { + uint8_t is_multi_planar : 1; + uint8_t is_yuv420 : 1; + } bits; + uint8_t all; + } config; + uint8_t phantom_otg_inst; + uint8_t phantom_pipe_mask; + uint8_t pad0; + uint8_t phantom_plane_pipe_masks[DMUB_MAX_PHANTOM_PLANES]; // phantom pipe mask per plane (for flip passthrough) + uint8_t pad1[4 - (DMUB_MAX_PHANTOM_PLANES % 4)]; +}; //v1 + +struct dmub_fams2_cmd_drr_stream_static_state { + uint16_t nom_stretched_vtotal; + uint16_t programming_delay_otg_vlines; + uint8_t only_stretch_if_required; + uint8_t pad[3]; +}; //v1 + +union dmub_fams2_stream_static_sub_state { + struct dmub_fams2_legacy_stream_static_state legacy; + struct dmub_fams2_subvp_stream_static_state subvp; + struct dmub_fams2_drr_stream_static_state drr; +}; //v0 + +union dmub_fams2_cmd_stream_static_sub_state { + struct dmub_fams2_cmd_legacy_stream_static_state legacy; + struct dmub_fams2_cmd_subvp_stream_static_state subvp; + struct dmub_fams2_cmd_drr_stream_static_state drr; +}; //v1 struct dmub_fams2_stream_static_state { enum fams2_stream_type type; @@ -1924,13 +1940,45 @@ struct dmub_fams2_stream_static_state { uint8_t pipe_mask; // pipe mask for the whole config uint8_t num_planes; uint8_t plane_pipe_masks[DMUB_MAX_PLANES]; // pipe mask per plane (for flip passthrough) - uint8_t pad[DMUB_MAX_PLANES % 4]; + uint8_t pad[4 - (DMUB_MAX_PLANES % 4)]; + union dmub_fams2_stream_static_sub_state sub_state; +}; //v0 + +struct dmub_fams2_cmd_stream_static_base_state { + enum fams2_stream_type type; + uint32_t otg_vline_time_ns; + uint32_t otg_vline_time_ticks; + uint16_t htotal; + uint16_t vtotal; // nominal vtotal + uint16_t vblank_start; + uint16_t vblank_end; + uint16_t max_vtotal; + uint16_t allow_start_otg_vline; + uint16_t allow_end_otg_vline; + uint16_t drr_keepout_otg_vline; // after this vline, vtotal cannot be changed + uint16_t scheduling_delay_otg_vlines; // min time to budget for ready to microschedule start + uint16_t contention_delay_otg_vlines; // time to budget for contention on execution + uint16_t vline_int_ack_delay_otg_vlines; // min time to budget for vertical interrupt firing + uint16_t allow_to_target_delay_otg_vlines; // time from allow vline to target vline union { - struct dmub_fams2_legacy_stream_static_state legacy; - struct dmub_fams2_subvp_stream_static_state subvp; - struct dmub_fams2_drr_stream_static_state drr; - } sub_state; -}; + struct { + uint8_t is_drr : 1; // stream is DRR enabled + uint8_t clamp_vtotal_min : 1; // clamp vtotal to min instead of nominal + uint8_t min_ttu_vblank_usable : 1; // if min ttu vblank is above wm, no force pstate is needed in blank + } bits; + uint8_t all; + } config; + uint8_t otg_inst; + uint8_t pipe_mask; // pipe mask for the whole config + uint8_t num_planes; + uint8_t plane_pipe_masks[DMUB_MAX_PLANES]; // pipe mask per plane (for flip passthrough) + uint8_t pad[4 - (DMUB_MAX_PLANES % 4)]; +}; //v1 + +struct dmub_fams2_stream_static_state_v1 { + struct dmub_fams2_cmd_stream_static_base_state base; + union dmub_fams2_cmd_stream_static_sub_state sub_state; +}; //v1 /** * enum dmub_fams2_allow_delay_check_mode - macroscheduler mode for breaking on excessive @@ -1970,7 +2018,11 @@ struct dmub_cmd_fams2_global_config { union dmub_cmd_fams2_config { struct dmub_cmd_fams2_global_config global; - struct dmub_fams2_stream_static_state stream; + struct dmub_fams2_stream_static_state stream; //v0 + union { + struct dmub_fams2_cmd_stream_static_base_state base; + union dmub_fams2_cmd_stream_static_sub_state sub_state; + } stream_v1; //v1 }; /** diff --git a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c index a3f3ff5d49ac..15ea216e903d 100644 --- a/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c +++ b/drivers/gpu/drm/amd/display/dmub/src/dmub_srv.c @@ -61,10 +61,6 @@ /* Default state size if meta is absent. */ #define DMUB_FW_STATE_SIZE (64 * 1024) -/* Default tracebuffer size if meta is absent. */ -#define DMUB_TRACE_BUFFER_SIZE (64 * 1024) - - /* Default scratch mem size. */ #define DMUB_SCRATCH_MEM_SIZE (1024) diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 7eefcb0f5070..98d9e840b0e2 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -401,9 +401,9 @@ struct amd_ip_funcs { int (*pre_soft_reset)(struct amdgpu_ip_block *ip_block); int (*soft_reset)(struct amdgpu_ip_block *ip_block); int (*post_soft_reset)(struct amdgpu_ip_block *ip_block); - int (*set_clockgating_state)(void *handle, + int (*set_clockgating_state)(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); - int (*set_powergating_state)(void *handle, + int (*set_powergating_state)(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); void (*get_clockgating_state)(void *handle, u64 *flags); void (*dump_ip_state)(struct amdgpu_ip_block *ip_block); diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_offset.h index cae1a7e74323..73c5dd5e83d4 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_offset.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_offset.h @@ -19,8 +19,8 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef _dcn_2_0_3_OFFSET_HEADER -#define _dcn_2_0_3_OFFSET_HEADER +#ifndef _dcn_2_0_1_OFFSET_HEADER +#define _dcn_2_0_1_OFFSET_HEADER // addressBlock: dce_dc_dccg_dccg_dispdec diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_sh_mask.h index ca1e1eb39256..290d807800a6 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_3_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_2_0_1_sh_mask.h @@ -18,8 +18,8 @@ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ -#ifndef _dcn_2_0_3_SH_MASK_HEADER -#define _dcn_2_0_3_SH_MASK_HEADER +#ifndef _dcn_2_0_1_SH_MASK_HEADER +#define _dcn_2_0_1_SH_MASK_HEADER // addressBlock: dce_dc_dccg_dccg_dispdec diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_offset.h new file mode 100644 index 000000000000..0e8f12728d5f --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_offset.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _umc_8_14_0_OFFSET_HEADER +#define _umc_8_14_0_OFFSET_HEADER + +#define regUMCCH0_GeccErrCntSel 0x0328 +#define regUMCCH0_GeccErrCntSel_BASE_IDX 0 +#define regUMCCH0_GeccErrCnt 0x0329 +#define regUMCCH0_GeccErrCnt_BASE_IDX 0 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_sh_mask.h new file mode 100644 index 000000000000..5d723b5d9b87 --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/umc/umc_8_14_0_sh_mask.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _umc_8_14_0_SH_MASK_HEADER +#define _umc_8_14_0_SH_MASK_HEADER + +//UMCCH0_GeccErrCntSel +#define UMCCH0_GeccErrCntSel__GeccErrInt__SHIFT 0xc +#define UMCCH0_GeccErrCntSel__GeccErrCntEn__SHIFT 0xf +#define UMCCH0_GeccErrCntSel__PoisonCntEn__SHIFT 0x10 +#define UMCCH0_GeccErrCntSel__GeccErrInt_MASK 0x00003000L +#define UMCCH0_GeccErrCntSel__GeccErrCntEn_MASK 0x00008000L +#define UMCCH0_GeccErrCntSel__PoisonCntEn_MASK 0x00030000L +//UMCCH0_GeccErrCnt +#define UMCCH0_GeccErrCnt__GeccErrCnt__SHIFT 0x0 +#define UMCCH0_GeccErrCnt__GeccUnCorrErrCnt__SHIFT 0x10 +#define UMCCH0_GeccErrCnt__GeccErrCnt_MASK 0x0000FFFFL +#define UMCCH0_GeccErrCnt__GeccUnCorrErrCnt_MASK 0xFFFF0000L + +#endif diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h index b0fc22383e28..0160d65f3f5e 100644 --- a/drivers/gpu/drm/amd/include/atomfirmware.h +++ b/drivers/gpu/drm/amd/include/atomfirmware.h @@ -1300,12 +1300,17 @@ struct atom_ext_display_path //usCaps enum ext_display_path_cap_def { - EXT_DISPLAY_PATH_CAPS__HBR2_DISABLE = 0x0001, - EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN = 0x0002, - EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK = 0x007C, - EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 = (0x01 << 2), //PI redriver chip - EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT = (0x02 << 2), //TI retimer chip - EXT_DISPLAY_PATH_CAPS__HDMI20_PARADE_PS175 = (0x03 << 2) //Parade DP->HDMI recoverter chip + EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK = 0x007E, + AMD_EXT_DISPLAY_PATH_CAPS__EXT_CHIP_MASK = 0x007E, + AMD_EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN = (0x01 << 1), + AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 = (0x02 << 1), + AMD_EXT_DISPLAY_PATH_CAPS__DP_EARLY_8B10B_TPS2 = (0x03 << 1), + AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT = (0x04 << 1), + AMD_EXT_DISPLAY_PATH_CAPS__HDMI20_PARADE_PS175 = (0x06 << 1), + EXT_DISPLAY_PATH_CAPS__DP_FIXED_VS_EN = (0x07 << 1), + EXT_DISPLAY_PATH_CAPS__HDMI20_PI3EQX1204 = (0x08 << 1), //PI redriver chip + EXT_DISPLAY_PATH_CAPS__HDMI20_TISN65DP159RSBT = (0x09 << 1), //TI retimer chip + EXT_DISPLAY_PATH_CAPS__AMD_INTERNAL = (0x0a << 1), //AMD internal customer chip placeholder }; struct atom_external_display_connection_info diff --git a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h new file mode 100644 index 000000000000..64b553e7de1a --- /dev/null +++ b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_5_0.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __IRQSRCS_VCN_5_0_H__ +#define __IRQSRCS_VCN_5_0_H__ + +#define VCN_5_0__SRCID__UVD_TRAP 114 // 0x72 UVD_TRAP +#define VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE 119 // 0x77 Encoder General Purpose +#define VCN_5_0__SRCID__UVD_ENC_LOW_LATENCY 120 // 0x78 Encoder Low Latency +#define VCN_5_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT 124 // 0x7c UVD system message interrupt +#define VCN_5_0__SRCID__JPEG_ENCODE 151 // 0x97 JRBC Encode interrupt +#define VCN_5_0__SRCID__JPEG_DECODE 153 // 0x99 JRBC Decode interrupt +#define VCN_5_0__SRCID__JPEG1_DECODE 149 // 0x95 JRBC1 Decode interrupt +#define VCN_5_0__SRCID__JPEG2_DECODE 151 // 0x97 JRBC2 Decode interrupt +#define VCN_5_0__SRCID__JPEG3_DECODE 171 // 0xab JRBC3 Decode interrupt +#define VCN_5_0__SRCID__JPEG4_DECODE 172 // 0xac JRBC4 Decode interrupt +#define VCN_5_0__SRCID__JPEG5_DECODE 173 // 0xad JRBC5 Decode interrupt +#define VCN_5_0__SRCID__JPEG6_DECODE 174 // 0xae JRBC6 Decode interrupt +#define VCN_5_0__SRCID__JPEG7_DECODE 175 // 0xaf JRBC7 Decode interrupt +#define VCN_5_0__SRCID__JPEG8_DECODE 177 // 0xb1 JRBC8 Decode interrupt +#define VCN_5_0__SRCID__JPEG9_DECODE 178 // 0xb2 JRBC9 Decode interrupt + +#define VCN_5_0__SRCID_UVD_POISON 160 +#define VCN_5_0__SRCID_DJPEG0_POISON 161 +#define VCN_5_0__SRCID_EJPEG0_POISON 162 +#endif diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index 67a5de573943..9189dcb65188 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -164,6 +164,7 @@ enum amd_pp_task { }; enum PP_SMC_POWER_PROFILE { + PP_SMC_POWER_PROFILE_UNKNOWN = -1, PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT = 0x0, PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x1, PP_SMC_POWER_PROFILE_POWERSAVING = 0x2, @@ -420,7 +421,9 @@ struct amd_pm_funcs { int (*load_firmware)(void *handle); int (*wait_for_fw_loading_complete)(void *handle); int (*set_powergating_by_smu)(void *handle, - uint32_t block_type, bool gate); + uint32_t block_type, + bool gate, + int inst); int (*set_clockgating_by_smu)(void *handle, uint32_t msg_id); int (*set_power_limit)(void *handle, uint32_t n); int (*get_power_limit)(void *handle, uint32_t *limit, diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 9dc82f4d7c93..6a9e26905edf 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -70,13 +70,18 @@ int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low) return ret; } -int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block_type, bool gate) +int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, + uint32_t block_type, + bool gate, + int inst) { int ret = 0; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; enum ip_power_state pwr_state = gate ? POWER_STATE_OFF : POWER_STATE_ON; + bool is_vcn = (block_type == AMD_IP_BLOCK_TYPE_UVD || block_type == AMD_IP_BLOCK_TYPE_VCN); - if (atomic_read(&adev->pm.pwr_state[block_type]) == pwr_state) { + if (atomic_read(&adev->pm.pwr_state[block_type]) == pwr_state && + (!is_vcn || adev->vcn.num_vcn_inst == 1)) { dev_dbg(adev->dev, "IP block%d already in the target %s state!", block_type, gate ? "gate" : "ungate"); return 0; @@ -88,7 +93,6 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block case AMD_IP_BLOCK_TYPE_UVD: case AMD_IP_BLOCK_TYPE_VCE: case AMD_IP_BLOCK_TYPE_GFX: - case AMD_IP_BLOCK_TYPE_VCN: case AMD_IP_BLOCK_TYPE_SDMA: case AMD_IP_BLOCK_TYPE_JPEG: case AMD_IP_BLOCK_TYPE_GMC: @@ -96,7 +100,12 @@ int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block case AMD_IP_BLOCK_TYPE_VPE: if (pp_funcs && pp_funcs->set_powergating_by_smu) ret = (pp_funcs->set_powergating_by_smu( - (adev)->powerplay.pp_handle, block_type, gate)); + (adev)->powerplay.pp_handle, block_type, gate, 0)); + break; + case AMD_IP_BLOCK_TYPE_VCN: + if (pp_funcs && pp_funcs->set_powergating_by_smu) + ret = (pp_funcs->set_powergating_by_smu( + (adev)->powerplay.pp_handle, block_type, gate, inst)); break; default: break; @@ -566,7 +575,17 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) return; } - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable, 0); + if (ret) + DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", + enable ? "enable" : "disable", ret); +} + +void amdgpu_dpm_enable_vcn(struct amdgpu_device *adev, bool enable, int inst) +{ + int ret = 0; + + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCN, !enable, inst); if (ret) DRM_ERROR("Dpm %s uvd failed, ret = %d. \n", enable ? "enable" : "disable", ret); @@ -591,7 +610,7 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) return; } - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable, 0); if (ret) DRM_ERROR("Dpm %s vce failed, ret = %d. \n", enable ? "enable" : "disable", ret); @@ -601,7 +620,7 @@ void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable) { int ret = 0; - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_JPEG, !enable); + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_JPEG, !enable, 0); if (ret) DRM_ERROR("Dpm %s jpeg failed, ret = %d. \n", enable ? "enable" : "disable", ret); @@ -611,7 +630,7 @@ void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable) { int ret = 0; - ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VPE, !enable); + ret = amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VPE, !enable, 0); if (ret) DRM_ERROR("Dpm %s vpe failed, ret = %d.\n", enable ? "enable" : "disable", ret); @@ -700,6 +719,21 @@ int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev) return ret; } +int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask) +{ + struct smu_context *smu = adev->powerplay.pp_handle; + int ret; + + if (!is_support_sw_smu(adev)) + return -EOPNOTSUPP; + + mutex_lock(&adev->pm.mutex); + ret = smu_reset_sdma(smu, inst_mask); + mutex_unlock(&adev->pm.mutex); + + return ret; +} + int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t *min, @@ -953,6 +987,24 @@ enum amd_dpm_forced_level amdgpu_dpm_get_performance_level(struct amdgpu_device return level; } +static void amdgpu_dpm_enter_umd_state(struct amdgpu_device *adev) +{ + /* enter UMD Pstate */ + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_UNGATE); + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_GFX, + AMD_CG_STATE_UNGATE); +} + +static void amdgpu_dpm_exit_umd_state(struct amdgpu_device *adev) +{ + /* exit UMD Pstate */ + amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_GFX, + AMD_CG_STATE_GATE); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, + AMD_PG_STATE_GATE); +} + int amdgpu_dpm_force_performance_level(struct amdgpu_device *adev, enum amd_dpm_forced_level level) { @@ -973,6 +1025,10 @@ int amdgpu_dpm_force_performance_level(struct amdgpu_device *adev, if (current_level == level) return 0; + if (!(current_level & profile_mode_mask) && + (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) + return -EINVAL; + if (adev->asic_type == CHIP_RAVEN) { if (!(adev->apu_flags & AMD_APU_IS_RAVEN2)) { if (current_level != AMD_DPM_FORCED_LEVEL_MANUAL && @@ -984,35 +1040,25 @@ int amdgpu_dpm_force_performance_level(struct amdgpu_device *adev, } } - if (!(current_level & profile_mode_mask) && - (level == AMD_DPM_FORCED_LEVEL_PROFILE_EXIT)) - return -EINVAL; - - if (!(current_level & profile_mode_mask) && - (level & profile_mode_mask)) { - /* enter UMD Pstate */ - amdgpu_device_ip_set_powergating_state(adev, - AMD_IP_BLOCK_TYPE_GFX, - AMD_PG_STATE_UNGATE); - amdgpu_device_ip_set_clockgating_state(adev, - AMD_IP_BLOCK_TYPE_GFX, - AMD_CG_STATE_UNGATE); - } else if ((current_level & profile_mode_mask) && - !(level & profile_mode_mask)) { - /* exit UMD Pstate */ - amdgpu_device_ip_set_clockgating_state(adev, - AMD_IP_BLOCK_TYPE_GFX, - AMD_CG_STATE_GATE); - amdgpu_device_ip_set_powergating_state(adev, - AMD_IP_BLOCK_TYPE_GFX, - AMD_PG_STATE_GATE); - } + if (!(current_level & profile_mode_mask) && (level & profile_mode_mask)) + amdgpu_dpm_enter_umd_state(adev); + else if ((current_level & profile_mode_mask) && + !(level & profile_mode_mask)) + amdgpu_dpm_exit_umd_state(adev); mutex_lock(&adev->pm.mutex); if (pp_funcs->force_performance_level(adev->powerplay.pp_handle, level)) { mutex_unlock(&adev->pm.mutex); + /* If new level failed, retain the umd state as before */ + if (!(current_level & profile_mode_mask) && + (level & profile_mode_mask)) + amdgpu_dpm_exit_umd_state(adev); + else if ((current_level & profile_mode_mask) && + !(level & profile_mode_mask)) + amdgpu_dpm_enter_umd_state(adev); + return -EINVAL; } diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h index 363af8990aa2..1f5ac7e0230d 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h @@ -397,7 +397,7 @@ int amdgpu_dpm_get_apu_thermal_limit(struct amdgpu_device *adev, uint32_t *limit int amdgpu_dpm_set_apu_thermal_limit(struct amdgpu_device *adev, uint32_t limit); int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, - uint32_t block_type, bool gate); + uint32_t block_type, bool gate, int inst); extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low); @@ -446,6 +446,7 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev); void amdgpu_dpm_compute_clocks(struct amdgpu_device *adev); void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable); +void amdgpu_dpm_enable_vcn(struct amdgpu_device *adev, bool enable, int inst); void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable); void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable); void amdgpu_dpm_enable_vpe(struct amdgpu_device *adev, bool enable); @@ -601,5 +602,6 @@ int amdgpu_dpm_set_pm_policy(struct amdgpu_device *adev, int policy_type, int policy_level); ssize_t amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev, enum pp_pm_policy p_type, char *buf); +int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask); #endif diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c index 8908646ad620..67a8e22b1126 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/kv_dpm.c @@ -3177,13 +3177,13 @@ static int kv_dpm_process_interrupt(struct amdgpu_device *adev, return 0; } -static int kv_dpm_set_clockgating_state(void *handle, +static int kv_dpm_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int kv_dpm_set_powergating_state(void *handle, +static int kv_dpm_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -3276,7 +3276,9 @@ static int kv_dpm_read_sensor(void *handle, int idx, } static int kv_set_powergating_by_smu(void *handle, - uint32_t block_type, bool gate) + uint32_t block_type, + bool gate, + int inst) { switch (block_type) { case AMD_IP_BLOCK_TYPE_UVD: diff --git a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c index ee23a0f897c5..a87dcf0974bc 100644 --- a/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c +++ b/drivers/gpu/drm/amd/pm/legacy-dpm/si_dpm.c @@ -7709,7 +7709,8 @@ static int si_dpm_init_microcode(struct amdgpu_device *adev) default: BUG(); } - err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s_smc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_smc.bin", chip_name); if (err) { DRM_ERROR("si_smc: Failed to load firmware. err = %d\"%s_smc.bin\"\n", err, chip_name); @@ -7849,13 +7850,13 @@ static int si_dpm_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int si_dpm_set_clockgating_state(void *handle, +static int si_dpm_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int si_dpm_set_powergating_state(void *handle, +static int si_dpm_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c index 26624a716fc6..686345f75f26 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c @@ -244,7 +244,7 @@ static bool pp_is_idle(void *handle) return false; } -static int pp_set_powergating_state(void *handle, +static int pp_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -267,7 +267,7 @@ static int pp_resume(struct amdgpu_ip_block *ip_block) return hwmgr_resume(hwmgr); } -static int pp_set_clockgating_state(void *handle, +static int pp_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; @@ -1227,7 +1227,9 @@ static void pp_dpm_powergate_sdma(void *handle, bool gate) } static int pp_set_powergating_by_smu(void *handle, - uint32_t block_type, bool gate) + uint32_t block_type, + bool gate, + int inst) { int ret = 0; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c index fe24219c3bf4..4bd92fd782be 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppatomctrl.c @@ -992,6 +992,8 @@ int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctr GetIndexIntoMasterTable(DATA, SMU_Info), &size, &frev, &crev); + if (!psmu_info) + return -EINVAL; for (i = 0; i < psmu_info->ucSclkEntryNum; i++) { table->entry[i].ucVco_setting = psmu_info->asSclkFcwRangeEntry[i].ucVco_setting; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c index 3007b054c873..776d58ea63ae 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_powertune.c @@ -1120,13 +1120,14 @@ static int vega10_enable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) result = vega10_program_didt_config_registers(hwmgr, SEEDCForceStallPatternConfig_Vega10, VEGA10_CONFIGREG_DIDT); result |= vega10_program_didt_config_registers(hwmgr, SEEDCCtrlForceStallConfig_Vega10, VEGA10_CONFIGREG_DIDT); if (0 != result) - return result; + goto exit_safe_mode; vega10_didt_set_mask(hwmgr, false); +exit_safe_mode: amdgpu_gfx_rlc_exit_safe_mode(adev, 0); - return 0; + return result; } static int vega10_disable_se_edc_force_stall_config(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 5eae14fe79f1..8ca793c222ff 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -238,7 +238,8 @@ static bool is_vcn_enabled(struct amdgpu_device *adev) } static int smu_dpm_set_vcn_enable(struct smu_context *smu, - bool enable) + bool enable, + int inst) { struct smu_power_context *smu_power = &smu->smu_power; struct smu_power_gate *power_gate = &smu_power->power_gate; @@ -253,12 +254,12 @@ static int smu_dpm_set_vcn_enable(struct smu_context *smu, if (!smu->ppt_funcs->dpm_set_vcn_enable) return 0; - if (atomic_read(&power_gate->vcn_gated) ^ enable) + if (atomic_read(&power_gate->vcn_gated[inst]) ^ enable) return 0; - ret = smu->ppt_funcs->dpm_set_vcn_enable(smu, enable, 0xff); + ret = smu->ppt_funcs->dpm_set_vcn_enable(smu, enable, inst); if (!ret) - atomic_set(&power_gate->vcn_gated, !enable); + atomic_set(&power_gate->vcn_gated[inst], !enable); return ret; } @@ -345,8 +346,9 @@ static int smu_set_mall_enable(struct smu_context *smu) * smu_dpm_set_power_gate - power gate/ungate the specific IP block * * @handle: smu_context pointer - * @block_type: the IP block to power gate/ungate - * @gate: to power gate if true, ungate otherwise + * @block_type: the IP block to power gate/ungate + * @gate: to power gate if true, ungate otherwise + * @inst: the instance of the IP block to power gate/ungate * * This API uses no smu->mutex lock protection due to: * 1. It is either called by other IP block(gfx/sdma/vcn/uvd/vce). @@ -357,7 +359,8 @@ static int smu_set_mall_enable(struct smu_context *smu) */ static int smu_dpm_set_power_gate(void *handle, uint32_t block_type, - bool gate) + bool gate, + int inst) { struct smu_context *smu = handle; int ret = 0; @@ -376,10 +379,10 @@ static int smu_dpm_set_power_gate(void *handle, */ case AMD_IP_BLOCK_TYPE_UVD: case AMD_IP_BLOCK_TYPE_VCN: - ret = smu_dpm_set_vcn_enable(smu, !gate); + ret = smu_dpm_set_vcn_enable(smu, !gate, inst); if (ret) - dev_err(smu->adev->dev, "Failed to power %s VCN!\n", - gate ? "gate" : "ungate"); + dev_err(smu->adev->dev, "Failed to power %s VCN instance %d!\n", + gate ? "gate" : "ungate", inst); break; case AMD_IP_BLOCK_TYPE_GFX: ret = smu_gfx_off_control(smu, gate); @@ -724,6 +727,7 @@ static int smu_set_funcs(struct amdgpu_device *adev) break; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 14): + case IP_VERSION(13, 0, 12): smu_v13_0_6_set_ppt_funcs(smu); /* Enable pp_od_clk_voltage node */ smu->od_enabled = true; @@ -764,6 +768,7 @@ static int smu_early_init(struct amdgpu_ip_block *ip_block) smu->smu_baco.platform_support = false; smu->smu_baco.maco_support = false; smu->user_dpm_profile.fan_mode = -1; + smu->power_profile_mode = PP_SMC_POWER_PROFILE_UNKNOWN; mutex_init(&smu->message_lock); @@ -781,21 +786,25 @@ static int smu_set_default_dpm_table(struct smu_context *smu) struct amdgpu_device *adev = smu->adev; struct smu_power_context *smu_power = &smu->smu_power; struct smu_power_gate *power_gate = &smu_power->power_gate; - int vcn_gate, jpeg_gate; + int vcn_gate[AMDGPU_MAX_VCN_INSTANCES], jpeg_gate, i; int ret = 0; if (!smu->ppt_funcs->set_default_dpm_table) return 0; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN) - vcn_gate = atomic_read(&power_gate->vcn_gated); + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + vcn_gate[i] = atomic_read(&power_gate->vcn_gated[i]); + } if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) jpeg_gate = atomic_read(&power_gate->jpeg_gated); if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { - ret = smu_dpm_set_vcn_enable(smu, true); - if (ret) - return ret; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + ret = smu_dpm_set_vcn_enable(smu, true, i); + if (ret) + return ret; + } } if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) { @@ -812,8 +821,10 @@ static int smu_set_default_dpm_table(struct smu_context *smu) if (adev->pg_flags & AMD_PG_SUPPORT_JPEG) smu_dpm_set_jpeg_enable(smu, !jpeg_gate); err_out: - if (adev->pg_flags & AMD_PG_SUPPORT_VCN) - smu_dpm_set_vcn_enable(smu, !vcn_gate); + if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + smu_dpm_set_vcn_enable(smu, !vcn_gate[i], i); + } return ret; } @@ -1248,11 +1259,26 @@ static bool smu_is_workload_profile_available(struct smu_context *smu, return smu->workload_map && smu->workload_map[profile].valid_mapping; } +static void smu_init_power_profile(struct smu_context *smu) +{ + if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_UNKNOWN) { + if (smu->is_apu || + !smu_is_workload_profile_available( + smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) + smu->power_profile_mode = + PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; + else + smu->power_profile_mode = + PP_SMC_POWER_PROFILE_FULLSCREEN3D; + } + smu_power_profile_mode_get(smu, smu->power_profile_mode); +} + static int smu_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; - int ret; + int i, ret; smu->pool_size = adev->pm.smu_prv_buffer_size; smu->smu_feature.feature_num = SMU_FEATURE_MAX; @@ -1264,18 +1290,13 @@ static int smu_sw_init(struct amdgpu_ip_block *ip_block) atomic64_set(&smu->throttle_int_counter, 0); smu->watermarks_bitmap = 0; - atomic_set(&smu->smu_power.power_gate.vcn_gated, 1); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + atomic_set(&smu->smu_power.power_gate.vcn_gated[i], 1); atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1); atomic_set(&smu->smu_power.power_gate.vpe_gated, 1); atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1); - if (smu->is_apu || - !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D)) - smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; - else - smu->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D; - smu_power_profile_mode_get(smu, smu->power_profile_mode); - + smu_init_power_profile(smu); smu->display_config = &adev->pm.pm_display_cfg; smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO; @@ -1800,7 +1821,7 @@ static int smu_start_smc_engine(struct smu_context *smu) static int smu_hw_init(struct amdgpu_ip_block *ip_block) { - int ret; + int i, ret; struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; @@ -1826,7 +1847,8 @@ static int smu_hw_init(struct amdgpu_ip_block *ip_block) ret = smu_set_gfx_imu_enable(smu); if (ret) return ret; - smu_dpm_set_vcn_enable(smu, true); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + smu_dpm_set_vcn_enable(smu, true, i); smu_dpm_set_jpeg_enable(smu, true); smu_dpm_set_vpe_enable(smu, true); smu_dpm_set_umsch_mm_enable(smu, true); @@ -2024,12 +2046,13 @@ static int smu_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; struct smu_context *smu = adev->powerplay.pp_handle; - int ret; + int i, ret; if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) return 0; - smu_dpm_set_vcn_enable(smu, false); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + smu_dpm_set_vcn_enable(smu, false, i); smu_dpm_set_jpeg_enable(smu, false); smu_dpm_set_vpe_enable(smu, false); smu_dpm_set_umsch_mm_enable(smu, false); @@ -2181,13 +2204,13 @@ static int smu_display_configuration_change(void *handle, return 0; } -static int smu_set_clockgating_state(void *handle, +static int smu_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int smu_set_powergating_state(void *handle, +static int smu_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -2969,9 +2992,10 @@ static int smu_read_sensor(void *handle, int *size_arg) { struct smu_context *smu = handle; + struct amdgpu_device *adev = smu->adev; struct smu_umd_pstate_table *pstate_table = &smu->pstate_table; - int ret = 0; + int i, ret = 0; uint32_t *size, size_val; if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) @@ -3017,7 +3041,13 @@ static int smu_read_sensor(void *handle, *size = 4; break; case AMDGPU_PP_SENSOR_VCN_POWER_STATE: - *(uint32_t *)data = atomic_read(&smu->smu_power.power_gate.vcn_gated) ? 0 : 1; + *(uint32_t *)data = 0; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (!atomic_read(&smu->smu_power.power_gate.vcn_gated[i])) { + *(uint32_t *)data = 1; + break; + } + } *size = 4; break; case AMDGPU_PP_SENSOR_MIN_FAN_RPM: @@ -3885,3 +3915,13 @@ int smu_send_rma_reason(struct smu_context *smu) return ret; } + +int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask) +{ + int ret = 0; + + if (smu->ppt_funcs && smu->ppt_funcs->reset_sdma) + ret = smu->ppt_funcs->reset_sdma(smu, inst_mask); + + return ret; +} diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h index 3925815358ce..3630593bce61 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h @@ -399,7 +399,7 @@ struct smu_dpm_context { struct smu_power_gate { bool uvd_gated; bool vce_gated; - atomic_t vcn_gated; + atomic_t vcn_gated[AMDGPU_MAX_VCN_INSTANCES]; atomic_t jpeg_gated; atomic_t vpe_gated; atomic_t umsch_mm_gated; @@ -1373,6 +1373,11 @@ struct pptable_funcs { int (*send_rma_reason)(struct smu_context *smu); /** + * @reset_sdma: message SMU to soft reset sdma instance. + */ + int (*reset_sdma)(struct smu_context *smu, uint32_t inst_mask); + + /** * @get_ecc_table: message SMU to get ECC INFO table. */ ssize_t (*get_ecc_info)(struct smu_context *smu, void *table); @@ -1631,6 +1636,7 @@ void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev); int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size); int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size); int smu_send_rma_reason(struct smu_context *smu); +int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask); int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type, int level); ssize_t smu_get_pm_policy_info(struct smu_context *smu, diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h index 41cb681927e2..147bfb12fd75 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h @@ -93,7 +93,8 @@ #define PPSMC_MSG_SelectPLPDMode 0x40 #define PPSMC_MSG_RmaDueToBadPageThreshold 0x43 #define PPSMC_MSG_SelectPstatePolicy 0x44 -#define PPSMC_Message_Count 0x45 +#define PPSMC_MSG_ResetSDMA 0x4D +#define PPSMC_Message_Count 0x4E //PPSMC Reset Types for driver msg argument #define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1 diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h index a299dc4a8071..e4cd6a0d13da 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h @@ -275,7 +275,8 @@ __SMU_DUMMY_MAP(RmaDueToBadPageThreshold), \ __SMU_DUMMY_MAP(SelectPstatePolicy), \ __SMU_DUMMY_MAP(MALLPowerController), \ - __SMU_DUMMY_MAP(MALLPowerState), + __SMU_DUMMY_MAP(MALLPowerState), \ + __SMU_DUMMY_MAP(ResetSDMA), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 286777ada1df..19a25fdc2f5b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1157,19 +1157,15 @@ static int sienna_cichlid_dpm_set_vcn_enable(struct smu_context *smu, int inst) { struct amdgpu_device *adev = smu->adev; - int i, ret = 0; + int ret = 0; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - /* vcn dpm on is a prerequisite for vcn power gate messages */ - if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { - ret = smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, - 0x10000 * i, NULL); - if (ret) - return ret; - } + if (adev->vcn.harvest_config & (1 << inst)) + return ret; + /* vcn dpm on is a prerequisite for vcn power gate messages */ + if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_MM_DPM_PG_BIT)) { + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, + 0x10000 * inst, NULL); } return ret; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 480cf3cb204d..189c6a32b6bd 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -105,7 +105,8 @@ int smu_v11_0_init_microcode(struct smu_context *smu) return 0; amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix); + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index 2bfea740dace..7bb45ff6d5c8 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -103,7 +103,8 @@ int smu_v13_0_init_microcode(struct smu_context *smu) return 0; amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix); + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (err) goto out; @@ -2108,18 +2109,14 @@ int smu_v13_0_set_vcn_enable(struct smu_context *smu, int inst) { struct amdgpu_device *adev = smu->adev; - int i, ret = 0; + int ret = 0; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (adev->vcn.harvest_config & (1 << inst)) + return ret; - ret = smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, - i << 16U, NULL); - if (ret) - return ret; - } + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, + inst << 16U, NULL); return ret; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c index ab3c93ddce46..5b86df0c8536 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c @@ -193,6 +193,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU MSG_MAP(SelectPLPDMode, PPSMC_MSG_SelectPLPDMode, 0), MSG_MAP(RmaDueToBadPageThreshold, PPSMC_MSG_RmaDueToBadPageThreshold, 0), MSG_MAP(SelectPstatePolicy, PPSMC_MSG_SelectPstatePolicy, 0), + MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0), }; // clang-format on @@ -304,7 +305,8 @@ static int smu_v13_0_6_init_microcode(struct smu_context *smu) amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - ret = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix); + ret = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (ret) goto out; @@ -2716,6 +2718,27 @@ static int smu_v13_0_6_send_rma_reason(struct smu_context *smu) return ret; } +static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask) +{ + struct amdgpu_device *adev = smu->adev; + int ret = 0; + + /* the message is only valid on SMU 13.0.6 with pmfw 85.121.00 and above */ + if ((adev->flags & AMD_IS_APU) || + amdgpu_ip_version(adev, MP1_HWIP, 0) != IP_VERSION(13, 0, 6) || + smu->smc_fw_version < 0x00557900) + return 0; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_ResetSDMA, inst_mask, NULL); + if (ret) + dev_err(smu->adev->dev, + "failed to send ResetSDMA event with mask 0x%x\n", + inst_mask); + + return ret; +} + static int mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) { struct smu_context *smu = adev->powerplay.pp_handle; @@ -3385,6 +3408,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = { .i2c_fini = smu_v13_0_6_i2c_control_fini, .send_hbm_bad_pages_num = smu_v13_0_6_smu_send_hbm_bad_page_num, .send_rma_reason = smu_v13_0_6_send_rma_reason, + .reset_sdma = smu_v13_0_6_reset_sdma, }; void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c index f4ac403b8b36..aabb94796005 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c @@ -2810,4 +2810,5 @@ void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu) smu->workload_map = smu_v13_0_7_workload_map; smu->smc_driver_if_version = SMU13_0_7_DRIVER_IF_VERSION; smu_v13_0_set_smu_mailbox_registers(smu); + smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT; } diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c index a87040cb2f2e..9b2f4fe1578b 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c @@ -79,7 +79,8 @@ int smu_v14_0_init_microcode(struct smu_context *smu) return 0; amdgpu_ucode_ip_version_decode(adev, MP1_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->pm.fw, "amdgpu/%s.bin", ucode_prefix); + err = amdgpu_ucode_request(adev, &adev->pm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); if (err) goto out; @@ -1511,29 +1512,24 @@ int smu_v14_0_set_vcn_enable(struct smu_context *smu, int inst) { struct amdgpu_device *adev = smu->adev; - int i, ret = 0; + int ret = 0; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (adev->vcn.harvest_config & (1 << inst)) + return ret; - if (smu->is_apu) { - if (i == 0) - ret = smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpVcn0 : SMU_MSG_PowerDownVcn0, - i << 16U, NULL); - else if (i == 1) - ret = smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpVcn1 : SMU_MSG_PowerDownVcn1, - i << 16U, NULL); - } else { + if (smu->is_apu) { + if (inst == 0) ret = smu_cmn_send_smc_msg_with_param(smu, enable ? - SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, - i << 16U, NULL); - } - - if (ret) - return ret; + SMU_MSG_PowerUpVcn0 : SMU_MSG_PowerDownVcn0, + inst << 16U, NULL); + else if (inst == 1) + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn1 : SMU_MSG_PowerDownVcn1, + inst << 16U, NULL); + } else { + ret = smu_cmn_send_smc_msg_with_param(smu, enable ? + SMU_MSG_PowerUpVcn : SMU_MSG_PowerDownVcn, + inst << 16U, NULL); } return ret; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 6a565ce74d5b..5cad09c5f2ff 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -2096,7 +2096,7 @@ static int smu_v14_0_2_enable_gfx_features(struct smu_context *smu) { struct amdgpu_device *adev = smu->adev; - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(14, 0, 2)) + if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 2)) return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnableAllSmuFeatures, FEATURE_PWR_GFX, NULL); else diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c index d981d721e796..358c1512b087 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c @@ -9,7 +9,7 @@ #include <linux/of.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> -#include <drm/drm_client_setup.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_module.h> #include <drm/drm_of.h> #include "komeda_dev.h" diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c index 1e7b1fcb2848..6ed504099188 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c @@ -63,7 +63,6 @@ static const struct drm_driver komeda_kms_driver = { .fops = &komeda_cma_fops, .name = "komeda", .desc = "Arm Komeda Display Processor driver", - .date = "20181101", .major = 0, .minor = 1, }; diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c index 191b806624df..c3179d74f3f5 100644 --- a/drivers/gpu/drm/arm/hdlcd_drv.c +++ b/drivers/gpu/drm/arm/hdlcd_drv.c @@ -22,8 +22,8 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_crtc.h> #include <drm/drm_debugfs.h> #include <drm/drm_drv.h> @@ -233,7 +233,6 @@ static const struct drm_driver hdlcd_driver = { .fops = &fops, .name = "hdlcd", .desc = "ARM HDLCD Controller DRM", - .date = "20151021", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c index fd2be80f3bf5..e083021e9e99 100644 --- a/drivers/gpu/drm/arm/malidp_drv.c +++ b/drivers/gpu/drm/arm/malidp_drv.c @@ -16,9 +16,9 @@ #include <linux/pm_runtime.h> #include <linux/debugfs.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_crtc.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> @@ -570,7 +570,6 @@ static const struct drm_driver malidp_driver = { .fops = &fops, .name = "mali-dp", .desc = "ARM Mali Display Processor driver", - .date = "20160106", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c index 650e450cc19b..cae25ad66c74 100644 --- a/drivers/gpu/drm/armada/armada_drv.c +++ b/drivers/gpu/drm/armada/armada_drv.c @@ -11,8 +11,8 @@ #include <linux/of_graph.h> #include <linux/platform_device.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_ioctl.h> #include <drm/drm_managed.h> @@ -45,7 +45,6 @@ static const struct drm_driver armada_drm_driver = { .minor = 0, .name = "armada-drm", .desc = "Armada SoC DRM", - .date = "20120730", .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, .ioctls = armada_ioctls, .num_ioctls = ARRAY_SIZE(armada_ioctls), diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c index b7e608ba6194..397e677a691c 100644 --- a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c +++ b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c @@ -13,8 +13,8 @@ #include <linux/regmap.h> #include <linux/reset.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_device.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_dma_helper.h> @@ -252,7 +252,6 @@ static const struct drm_driver aspeed_gfx_driver = { .fops = &fops, .name = "aspeed-gfx-drm", .desc = "ASPEED GFX DRM", - .date = "20180319", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index 4afe4be072ef..ff3bcdd1cff2 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -31,8 +31,8 @@ #include <linux/of.h> #include <linux/pci.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_shmem.h> #include <drm/drm_gem_shmem_helper.h> @@ -60,7 +60,6 @@ static const struct drm_driver ast_driver = { .fops = &ast_fops, .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index 21ce3769bf0d..6b4305ac07d4 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -43,7 +43,6 @@ #define DRIVER_NAME "ast" #define DRIVER_DESC "AST" -#define DRIVER_DATE "20120228" #define DRIVER_MAJOR 0 #define DRIVER_MINOR 1 diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c index 7b209af7cf45..fa8ad94e431a 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c @@ -16,9 +16,9 @@ #include <linux/pm_runtime.h> #include <linux/platform_device.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_fourcc.h> @@ -846,7 +846,6 @@ static const struct drm_driver atmel_hlcdc_dc_driver = { .fops = &fops, .name = "atmel-hlcdc", .desc = "Atmel HLCD Controller DRM", - .date = "20141504", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c b/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c index b754947e3e00..83d711ee3a2e 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c +++ b/drivers/gpu/drm/bridge/analogix/analogix-anx6345.c @@ -793,7 +793,7 @@ static void anx6345_i2c_remove(struct i2c_client *client) } static const struct i2c_device_id anx6345_id[] = { - { "anx6345", 0 }, + { "anx6345" }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(i2c, anx6345_id); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index a2675b121fe4..6238eabd2328 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -2002,8 +2002,10 @@ static int anx7625_audio_get_eld(struct device *dev, void *data, memset(buf, 0, len); } else { dev_dbg(dev, "audio copy eld\n"); + mutex_lock(&ctx->connector->eld_mutex); memcpy(buf, ctx->connector->eld, min(sizeof(ctx->connector->eld), len)); + mutex_unlock(&ctx->connector->eld_mutex); } return 0; @@ -2795,7 +2797,7 @@ static void anx7625_i2c_remove(struct i2c_client *client) } static const struct i2c_device_id anx7625_id[] = { - {"anx7625", 0}, + { "anx7625" }, {} }; diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c index 31832ba4017f..42248f179b69 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.c @@ -500,34 +500,6 @@ static void cdns_mhdp_hdcp_prop_work(struct work_struct *work) drm_modeset_unlock(&dev->mode_config.connection_mutex); } -int cdns_mhdp_hdcp_set_lc(struct cdns_mhdp_device *mhdp, u8 *val) -{ - int ret; - - mutex_lock(&mhdp->mbox_mutex); - ret = cdns_mhdp_secure_mailbox_send(mhdp, MB_MODULE_ID_HDCP_GENERAL, - HDCP_GENERAL_SET_LC_128, - 16, val); - mutex_unlock(&mhdp->mbox_mutex); - - return ret; -} - -int -cdns_mhdp_hdcp_set_public_key_param(struct cdns_mhdp_device *mhdp, - struct cdns_hdcp_tx_public_key_param *val) -{ - int ret; - - mutex_lock(&mhdp->mbox_mutex); - ret = cdns_mhdp_secure_mailbox_send(mhdp, MB_MODULE_ID_HDCP_TX, - HDCP2X_TX_SET_PUBLIC_KEY_PARAMS, - sizeof(*val), (u8 *)val); - mutex_unlock(&mhdp->mbox_mutex); - - return ret; -} - int cdns_mhdp_hdcp_enable(struct cdns_mhdp_device *mhdp, u8 content_type) { int ret; diff --git a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.h b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.h index 334c0b8b0d4f..3b6ec9c3a8d8 100644 --- a/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.h +++ b/drivers/gpu/drm/bridge/cadence/cdns-mhdp8546-hdcp.h @@ -82,9 +82,6 @@ struct cdns_hdcp_tx_public_key_param { u8 E[DLP_E]; }; -int cdns_mhdp_hdcp_set_public_key_param(struct cdns_mhdp_device *mhdp, - struct cdns_hdcp_tx_public_key_param *val); -int cdns_mhdp_hdcp_set_lc(struct cdns_mhdp_device *mhdp, u8 *val); int cdns_mhdp_hdcp_enable(struct cdns_mhdp_device *mhdp, u8 content_type); int cdns_mhdp_hdcp_disable(struct cdns_mhdp_device *mhdp); void cdns_mhdp_hdcp_init(struct cdns_mhdp_device *mhdp); diff --git a/drivers/gpu/drm/bridge/chipone-icn6211.c b/drivers/gpu/drm/bridge/chipone-icn6211.c index 9eecac457dcf..d47703559b0d 100644 --- a/drivers/gpu/drm/bridge/chipone-icn6211.c +++ b/drivers/gpu/drm/bridge/chipone-icn6211.c @@ -785,7 +785,7 @@ static struct mipi_dsi_driver chipone_dsi_driver = { }, }; -static struct i2c_device_id chipone_i2c_id[] = { +static const struct i2c_device_id chipone_i2c_id[] = { { "chipone,icn6211" }, {}, }; diff --git a/drivers/gpu/drm/bridge/chrontel-ch7033.c b/drivers/gpu/drm/bridge/chrontel-ch7033.c index c83486cf6b15..da17f0978a79 100644 --- a/drivers/gpu/drm/bridge/chrontel-ch7033.c +++ b/drivers/gpu/drm/bridge/chrontel-ch7033.c @@ -597,7 +597,7 @@ static const struct of_device_id ch7033_dt_ids[] = { MODULE_DEVICE_TABLE(of, ch7033_dt_ids); static const struct i2c_device_id ch7033_ids[] = { - { "ch7033", 0 }, + { "ch7033" }, { } }; MODULE_DEVICE_TABLE(i2c, ch7033_ids); diff --git a/drivers/gpu/drm/bridge/ite-it6263.c b/drivers/gpu/drm/bridge/ite-it6263.c index cbabd4e20d3e..44af1f25034a 100644 --- a/drivers/gpu/drm/bridge/ite-it6263.c +++ b/drivers/gpu/drm/bridge/ite-it6263.c @@ -48,6 +48,7 @@ #define REG_COL_DEP GENMASK(1, 0) #define BIT8 FIELD_PREP(REG_COL_DEP, 1) #define OUT_MAP BIT(4) +#define VESA BIT(4) #define JEIDA 0 #define REG_DESSC_ENB BIT(6) #define DMODE BIT(7) @@ -428,12 +429,30 @@ static inline void it6263_lvds_reset(struct it6263 *it) fsleep(10000); } +static inline bool it6263_is_input_bus_fmt_valid(int input_fmt) +{ + switch (input_fmt) { + case MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA: + case MEDIA_BUS_FMT_RGB888_1X7X4_SPWG: + return true; + } + return false; +} + static inline void it6263_lvds_set_interface(struct it6263 *it) { + u8 fmt; + /* color depth */ regmap_write_bits(it->lvds_regmap, LVDS_REG_2C, REG_COL_DEP, BIT8); + + if (it->lvds_data_mapping == MEDIA_BUS_FMT_RGB888_1X7X4_SPWG) + fmt = VESA; + else + fmt = JEIDA; + /* output mapping */ - regmap_write_bits(it->lvds_regmap, LVDS_REG_2C, OUT_MAP, JEIDA); + regmap_write_bits(it->lvds_regmap, LVDS_REG_2C, OUT_MAP, fmt); if (it->lvds_dual_link) { regmap_write_bits(it->lvds_regmap, LVDS_REG_2C, DMODE, DISO); @@ -714,14 +733,14 @@ it6263_bridge_atomic_get_input_bus_fmts(struct drm_bridge *bridge, *num_input_fmts = 0; - if (it->lvds_data_mapping != MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA) + if (!it6263_is_input_bus_fmt_valid(it->lvds_data_mapping)) return NULL; input_fmts = kmalloc(sizeof(*input_fmts), GFP_KERNEL); if (!input_fmts) return NULL; - input_fmts[0] = MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA; + input_fmts[0] = it->lvds_data_mapping; *num_input_fmts = 1; return input_fmts; @@ -878,7 +897,7 @@ static const struct of_device_id it6263_of_match[] = { MODULE_DEVICE_TABLE(of, it6263_of_match); static const struct i2c_device_id it6263_i2c_ids[] = { - { "it6263", 0 }, + { "it6263" }, { } }; MODULE_DEVICE_TABLE(i2c, it6263_i2c_ids); diff --git a/drivers/gpu/drm/bridge/ite-it6505.c b/drivers/gpu/drm/bridge/ite-it6505.c index 008d86cc562a..0faad10ba8e4 100644 --- a/drivers/gpu/drm/bridge/ite-it6505.c +++ b/drivers/gpu/drm/bridge/ite-it6505.c @@ -3497,7 +3497,7 @@ static void it6505_i2c_remove(struct i2c_client *client) } static const struct i2c_device_id it6505_id[] = { - { "it6505", 0 }, + { "it6505" }, { } }; diff --git a/drivers/gpu/drm/bridge/ite-it66121.c b/drivers/gpu/drm/bridge/ite-it66121.c index 35ae3f0e8f51..940083e5d2dd 100644 --- a/drivers/gpu/drm/bridge/ite-it66121.c +++ b/drivers/gpu/drm/bridge/ite-it66121.c @@ -1450,8 +1450,10 @@ static int it66121_audio_get_eld(struct device *dev, void *data, dev_dbg(dev, "No connector present, passing empty EDID data"); memset(buf, 0, len); } else { + mutex_lock(&ctx->connector->eld_mutex); memcpy(buf, ctx->connector->eld, min(sizeof(ctx->connector->eld), len)); + mutex_unlock(&ctx->connector->eld_mutex); } mutex_unlock(&ctx->lock); diff --git a/drivers/gpu/drm/bridge/lontium-lt8912b.c b/drivers/gpu/drm/bridge/lontium-lt8912b.c index e265ab3c8c92..52da204f5740 100644 --- a/drivers/gpu/drm/bridge/lontium-lt8912b.c +++ b/drivers/gpu/drm/bridge/lontium-lt8912b.c @@ -815,8 +815,8 @@ static const struct of_device_id lt8912_dt_match[] = { MODULE_DEVICE_TABLE(of, lt8912_dt_match); static const struct i2c_device_id lt8912_id[] = { - {"lt8912", 0}, - {}, + { "lt8912" }, + {} }; MODULE_DEVICE_TABLE(i2c, lt8912_id); diff --git a/drivers/gpu/drm/bridge/lontium-lt9211.c b/drivers/gpu/drm/bridge/lontium-lt9211.c index c8881796fba4..999ddebb832d 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9211.c +++ b/drivers/gpu/drm/bridge/lontium-lt9211.c @@ -773,7 +773,7 @@ static void lt9211_remove(struct i2c_client *client) drm_bridge_remove(&ctx->bridge); } -static struct i2c_device_id lt9211_id[] = { +static const struct i2c_device_id lt9211_id[] = { { "lontium,lt9211" }, {}, }; diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index 1b31fdebe164..74f726efc746 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -757,7 +757,6 @@ static enum drm_mode_status lt9611_bridge_mode_valid(struct drm_bridge *bridge, const struct drm_display_mode *mode) { struct lt9611 *lt9611 = bridge_to_lt9611(bridge); - unsigned long long rate; if (mode->hdisplay > 3840) return MODE_BAD_HVALUE; @@ -765,8 +764,7 @@ static enum drm_mode_status lt9611_bridge_mode_valid(struct drm_bridge *bridge, if (mode->hdisplay > 2000 && !lt9611->dsi1_node) return MODE_PANEL; - rate = drm_hdmi_compute_mode_clock(mode, 8, HDMI_COLORSPACE_RGB); - return bridge->funcs->hdmi_tmds_char_rate_valid(bridge, mode, rate); + return MODE_OK; } static int lt9611_bridge_atomic_check(struct drm_bridge *bridge, @@ -1235,8 +1233,8 @@ static void lt9611_remove(struct i2c_client *client) of_node_put(lt9611->dsi0_node); } -static struct i2c_device_id lt9611_id[] = { - { "lontium,lt9611", 0 }, +static const struct i2c_device_id lt9611_id[] = { + { "lontium,lt9611" }, {} }; MODULE_DEVICE_TABLE(i2c, lt9611_id); diff --git a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c index 4d1d40e1f1b4..db9a5466060b 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c @@ -913,8 +913,8 @@ static void lt9611uxc_remove(struct i2c_client *client) of_node_put(lt9611uxc->dsi0_node); } -static struct i2c_device_id lt9611uxc_id[] = { - { "lontium,lt9611uxc", 0 }, +static const struct i2c_device_id lt9611uxc_id[] = { + { "lontium,lt9611uxc" }, { /* sentinel */ } }; diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index 37f1acf5c0f8..a3dcee62e7a5 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -318,8 +318,8 @@ static void stdp4028_ge_b850v3_fw_remove(struct i2c_client *stdp4028_i2c) } static const struct i2c_device_id stdp4028_ge_b850v3_fw_i2c_table[] = { - {"stdp4028_ge_fw", 0}, - {}, + { "stdp4028_ge_fw" }, + {} }; MODULE_DEVICE_TABLE(i2c, stdp4028_ge_b850v3_fw_i2c_table); @@ -365,8 +365,8 @@ static void stdp2690_ge_b850v3_fw_remove(struct i2c_client *stdp2690_i2c) } static const struct i2c_device_id stdp2690_ge_b850v3_fw_i2c_table[] = { - {"stdp2690_ge_fw", 0}, - {}, + { "stdp2690_ge_fw" }, + {} }; MODULE_DEVICE_TABLE(i2c, stdp2690_ge_b850v3_fw_i2c_table); diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c index e77aab965fcf..44e36ae66db4 100644 --- a/drivers/gpu/drm/bridge/nxp-ptn3460.c +++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c @@ -319,8 +319,8 @@ static void ptn3460_remove(struct i2c_client *client) } static const struct i2c_device_id ptn3460_i2c_table[] = { - {"ptn3460", 0}, - {}, + { "ptn3460" }, + {} }; MODULE_DEVICE_TABLE(i2c, ptn3460_i2c_table); diff --git a/drivers/gpu/drm/bridge/sii902x.c b/drivers/gpu/drm/bridge/sii902x.c index 9be9cc5b9025..127da22011b3 100644 --- a/drivers/gpu/drm/bridge/sii902x.c +++ b/drivers/gpu/drm/bridge/sii902x.c @@ -1239,8 +1239,8 @@ static const struct of_device_id sii902x_dt_ids[] = { MODULE_DEVICE_TABLE(of, sii902x_dt_ids); static const struct i2c_device_id sii902x_i2c_ids[] = { - { "sii9022", 0 }, - { }, + { "sii9022" }, + { } }; MODULE_DEVICE_TABLE(i2c, sii902x_i2c_ids); diff --git a/drivers/gpu/drm/bridge/sii9234.c b/drivers/gpu/drm/bridge/sii9234.c index 0c74cdc07032..cd7837c9a6e0 100644 --- a/drivers/gpu/drm/bridge/sii9234.c +++ b/drivers/gpu/drm/bridge/sii9234.c @@ -945,8 +945,8 @@ static const struct of_device_id sii9234_dt_match[] = { MODULE_DEVICE_TABLE(of, sii9234_dt_match); static const struct i2c_device_id sii9234_id[] = { - { "SII9234", 0 }, - { }, + { "SII9234" }, + { } }; MODULE_DEVICE_TABLE(i2c, sii9234_id); diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 26b8d137bce0..28a2e1ee04b2 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -2368,8 +2368,8 @@ static const struct of_device_id sii8620_dt_match[] = { MODULE_DEVICE_TABLE(of, sii8620_dt_match); static const struct i2c_device_id sii8620_id[] = { - { "sii8620", 0 }, - { }, + { "sii8620" }, + { } }; MODULE_DEVICE_TABLE(i2c, sii8620_id); diff --git a/drivers/gpu/drm/bridge/synopsys/Kconfig b/drivers/gpu/drm/bridge/synopsys/Kconfig index ca416dab156d..f3ab2f985f8c 100644 --- a/drivers/gpu/drm/bridge/synopsys/Kconfig +++ b/drivers/gpu/drm/bridge/synopsys/Kconfig @@ -59,3 +59,9 @@ config DRM_DW_MIPI_DSI select DRM_KMS_HELPER select DRM_MIPI_DSI select DRM_PANEL_BRIDGE + +config DRM_DW_MIPI_DSI2 + tristate + select DRM_KMS_HELPER + select DRM_MIPI_DSI + select DRM_PANEL_BRIDGE diff --git a/drivers/gpu/drm/bridge/synopsys/Makefile b/drivers/gpu/drm/bridge/synopsys/Makefile index 9869d9651ed1..9dc376d220ad 100644 --- a/drivers/gpu/drm/bridge/synopsys/Makefile +++ b/drivers/gpu/drm/bridge/synopsys/Makefile @@ -8,3 +8,4 @@ obj-$(CONFIG_DRM_DW_HDMI_CEC) += dw-hdmi-cec.o obj-$(CONFIG_DRM_DW_HDMI_QP) += dw-hdmi-qp.o obj-$(CONFIG_DRM_DW_MIPI_DSI) += dw-mipi-dsi.o +obj-$(CONFIG_DRM_DW_MIPI_DSI2) += dw-mipi-dsi2.o diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c index 181c5164b231..c686671e4850 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.c @@ -442,16 +442,14 @@ dw_hdmi_qp_bridge_edid_read(struct drm_bridge *bridge, } static enum drm_mode_status -dw_hdmi_qp_bridge_mode_valid(struct drm_bridge *bridge, - const struct drm_display_info *info, - const struct drm_display_mode *mode) +dw_hdmi_qp_bridge_tmds_char_rate_valid(const struct drm_bridge *bridge, + const struct drm_display_mode *mode, + unsigned long long rate) { struct dw_hdmi_qp *hdmi = bridge->driver_private; - unsigned long long rate; - rate = drm_hdmi_compute_mode_clock(mode, 8, HDMI_COLORSPACE_RGB); if (rate > HDMI14_MAX_TMDSCLK) { - dev_dbg(hdmi->dev, "Unsupported mode clock: %d\n", mode->clock); + dev_dbg(hdmi->dev, "Unsupported TMDS char rate: %lld\n", rate); return MODE_CLOCK_HIGH; } @@ -510,7 +508,7 @@ static const struct drm_bridge_funcs dw_hdmi_qp_bridge_funcs = { .atomic_disable = dw_hdmi_qp_bridge_atomic_disable, .detect = dw_hdmi_qp_bridge_detect, .edid_read = dw_hdmi_qp_bridge_edid_read, - .mode_valid = dw_hdmi_qp_bridge_mode_valid, + .hdmi_tmds_char_rate_valid = dw_hdmi_qp_bridge_tmds_char_rate_valid, .hdmi_clear_infoframe = dw_hdmi_qp_bridge_clear_infoframe, .hdmi_write_infoframe = dw_hdmi_qp_bridge_write_infoframe, }; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.h index 2115b8ef0bd6..72987e6c4689 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.h +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-qp.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author: * Algea Cao <algea.cao@rock-chips.com> */ diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi2.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi2.c new file mode 100644 index 000000000000..d7569bf2d9c3 --- /dev/null +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi2.c @@ -0,0 +1,1030 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2024, Fuzhou Rockchip Electronics Co., Ltd + * + * Modified by Heiko Stuebner <heiko.stuebner@cherry.de> + * This generic Synopsys DesignWare MIPI DSI2 host driver is based on the + * Rockchip version from rockchip/dw-mipi-dsi2.c converted to use bridge APIs. + */ + +#include <linux/bitfield.h> +#include <linux/clk.h> +#include <linux/iopoll.h> +#include <linux/media-bus-format.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/reset.h> + +#include <video/mipi_display.h> + +#include <drm/bridge/dw_mipi_dsi2.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_bridge.h> +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_of.h> +#include <drm/drm_print.h> + +#define DSI2_PWR_UP 0x000c +#define RESET 0 +#define POWER_UP BIT(0) +#define CMD_TX_MODE(x) FIELD_PREP(BIT(24), x) +#define DSI2_SOFT_RESET 0x0010 +#define SYS_RSTN BIT(2) +#define PHY_RSTN BIT(1) +#define IPI_RSTN BIT(0) +#define INT_ST_MAIN 0x0014 +#define DSI2_MODE_CTRL 0x0018 +#define DSI2_MODE_STATUS 0x001c +#define DSI2_CORE_STATUS 0x0020 +#define PRI_RD_DATA_AVAIL BIT(26) +#define PRI_FIFOS_NOT_EMPTY BIT(25) +#define PRI_BUSY BIT(24) +#define CRI_RD_DATA_AVAIL BIT(18) +#define CRT_FIFOS_NOT_EMPTY BIT(17) +#define CRI_BUSY BIT(16) +#define IPI_FIFOS_NOT_EMPTY BIT(9) +#define IPI_BUSY BIT(8) +#define CORE_FIFOS_NOT_EMPTY BIT(1) +#define CORE_BUSY BIT(0) +#define MANUAL_MODE_CFG 0x0024 +#define MANUAL_MODE_EN BIT(0) +#define DSI2_TIMEOUT_HSTX_CFG 0x0048 +#define TO_HSTX(x) FIELD_PREP(GENMASK(15, 0), x) +#define DSI2_TIMEOUT_HSTXRDY_CFG 0x004c +#define TO_HSTXRDY(x) FIELD_PREP(GENMASK(15, 0), x) +#define DSI2_TIMEOUT_LPRX_CFG 0x0050 +#define TO_LPRXRDY(x) FIELD_PREP(GENMASK(15, 0), x) +#define DSI2_TIMEOUT_LPTXRDY_CFG 0x0054 +#define TO_LPTXRDY(x) FIELD_PREP(GENMASK(15, 0), x) +#define DSI2_TIMEOUT_LPTXTRIG_CFG 0x0058 +#define TO_LPTXTRIG(x) FIELD_PREP(GENMASK(15, 0), x) +#define DSI2_TIMEOUT_LPTXULPS_CFG 0x005c +#define TO_LPTXULPS(x) FIELD_PREP(GENMASK(15, 0), x) +#define DSI2_TIMEOUT_BTA_CFG 0x60 +#define TO_BTA(x) FIELD_PREP(GENMASK(15, 0), x) + +#define DSI2_PHY_MODE_CFG 0x0100 +#define PPI_WIDTH(x) FIELD_PREP(GENMASK(9, 8), x) +#define PHY_LANES(x) FIELD_PREP(GENMASK(5, 4), (x) - 1) +#define PHY_TYPE(x) FIELD_PREP(BIT(0), x) +#define DSI2_PHY_CLK_CFG 0X0104 +#define PHY_LPTX_CLK_DIV(x) FIELD_PREP(GENMASK(12, 8), x) +#define CLK_TYPE_MASK BIT(0) +#define NON_CONTINUOUS_CLK BIT(0) +#define CONTINUOUS_CLK 0 +#define DSI2_PHY_LP2HS_MAN_CFG 0x010c +#define PHY_LP2HS_TIME(x) FIELD_PREP(GENMASK(28, 0), x) +#define DSI2_PHY_HS2LP_MAN_CFG 0x0114 +#define PHY_HS2LP_TIME(x) FIELD_PREP(GENMASK(28, 0), x) +#define DSI2_PHY_MAX_RD_T_MAN_CFG 0x011c +#define PHY_MAX_RD_TIME(x) FIELD_PREP(GENMASK(26, 0), x) +#define DSI2_PHY_ESC_CMD_T_MAN_CFG 0x0124 +#define PHY_ESC_CMD_TIME(x) FIELD_PREP(GENMASK(28, 0), x) +#define DSI2_PHY_ESC_BYTE_T_MAN_CFG 0x012c +#define PHY_ESC_BYTE_TIME(x) FIELD_PREP(GENMASK(28, 0), x) + +#define DSI2_PHY_IPI_RATIO_MAN_CFG 0x0134 +#define PHY_IPI_RATIO(x) FIELD_PREP(GENMASK(21, 0), x) +#define DSI2_PHY_SYS_RATIO_MAN_CFG 0x013C +#define PHY_SYS_RATIO(x) FIELD_PREP(GENMASK(16, 0), x) + +#define DSI2_DSI_GENERAL_CFG 0x0200 +#define BTA_EN BIT(1) +#define EOTP_TX_EN BIT(0) +#define DSI2_DSI_VCID_CFG 0x0204 +#define TX_VCID(x) FIELD_PREP(GENMASK(1, 0), x) +#define DSI2_DSI_SCRAMBLING_CFG 0x0208 +#define SCRAMBLING_SEED(x) FIELD_PREP(GENMASK(31, 16), x) +#define SCRAMBLING_EN BIT(0) +#define DSI2_DSI_VID_TX_CFG 0x020c +#define LPDT_DISPLAY_CMD_EN BIT(20) +#define BLK_VFP_HS_EN BIT(14) +#define BLK_VBP_HS_EN BIT(13) +#define BLK_VSA_HS_EN BIT(12) +#define BLK_HFP_HS_EN BIT(6) +#define BLK_HBP_HS_EN BIT(5) +#define BLK_HSA_HS_EN BIT(4) +#define VID_MODE_TYPE(x) FIELD_PREP(GENMASK(1, 0), x) +#define DSI2_CRI_TX_HDR 0x02c0 +#define CMD_TX_MODE(x) FIELD_PREP(BIT(24), x) +#define DSI2_CRI_TX_PLD 0x02c4 +#define DSI2_CRI_RX_HDR 0x02c8 +#define DSI2_CRI_RX_PLD 0x02cc + +#define DSI2_IPI_COLOR_MAN_CFG 0x0300 +#define IPI_DEPTH(x) FIELD_PREP(GENMASK(7, 4), x) +#define IPI_DEPTH_5_6_5_BITS 0x02 +#define IPI_DEPTH_6_BITS 0x03 +#define IPI_DEPTH_8_BITS 0x05 +#define IPI_DEPTH_10_BITS 0x06 +#define IPI_FORMAT(x) FIELD_PREP(GENMASK(3, 0), x) +#define IPI_FORMAT_RGB 0x0 +#define IPI_FORMAT_DSC 0x0b +#define DSI2_IPI_VID_HSA_MAN_CFG 0x0304 +#define VID_HSA_TIME(x) FIELD_PREP(GENMASK(29, 0), x) +#define DSI2_IPI_VID_HBP_MAN_CFG 0x030c +#define VID_HBP_TIME(x) FIELD_PREP(GENMASK(29, 0), x) +#define DSI2_IPI_VID_HACT_MAN_CFG 0x0314 +#define VID_HACT_TIME(x) FIELD_PREP(GENMASK(29, 0), x) +#define DSI2_IPI_VID_HLINE_MAN_CFG 0x031c +#define VID_HLINE_TIME(x) FIELD_PREP(GENMASK(29, 0), x) +#define DSI2_IPI_VID_VSA_MAN_CFG 0x0324 +#define VID_VSA_LINES(x) FIELD_PREP(GENMASK(9, 0), x) +#define DSI2_IPI_VID_VBP_MAN_CFG 0X032C +#define VID_VBP_LINES(x) FIELD_PREP(GENMASK(9, 0), x) +#define DSI2_IPI_VID_VACT_MAN_CFG 0X0334 +#define VID_VACT_LINES(x) FIELD_PREP(GENMASK(13, 0), x) +#define DSI2_IPI_VID_VFP_MAN_CFG 0X033C +#define VID_VFP_LINES(x) FIELD_PREP(GENMASK(9, 0), x) +#define DSI2_IPI_PIX_PKT_CFG 0x0344 +#define MAX_PIX_PKT(x) FIELD_PREP(GENMASK(15, 0), x) + +#define DSI2_INT_ST_PHY 0x0400 +#define DSI2_INT_MASK_PHY 0x0404 +#define DSI2_INT_ST_TO 0x0410 +#define DSI2_INT_MASK_TO 0x0414 +#define DSI2_INT_ST_ACK 0x0420 +#define DSI2_INT_MASK_ACK 0x0424 +#define DSI2_INT_ST_IPI 0x0430 +#define DSI2_INT_MASK_IPI 0x0434 +#define DSI2_INT_ST_FIFO 0x0440 +#define DSI2_INT_MASK_FIFO 0x0444 +#define DSI2_INT_ST_PRI 0x0450 +#define DSI2_INT_MASK_PRI 0x0454 +#define DSI2_INT_ST_CRI 0x0460 +#define DSI2_INT_MASK_CRI 0x0464 +#define DSI2_INT_FORCE_CRI 0x0468 +#define DSI2_MAX_REGISGER DSI2_INT_FORCE_CRI + +#define MODE_STATUS_TIMEOUT_US 10000 +#define CMD_PKT_STATUS_TIMEOUT_US 20000 + +enum vid_mode_type { + VID_MODE_TYPE_NON_BURST_SYNC_PULSES, + VID_MODE_TYPE_NON_BURST_SYNC_EVENTS, + VID_MODE_TYPE_BURST, +}; + +enum mode_ctrl { + IDLE_MODE, + AUTOCALC_MODE, + COMMAND_MODE, + VIDEO_MODE, + DATA_STREAM_MODE, + VIDEO_TEST_MODE, + DATA_STREAM_TEST_MODE, +}; + +enum ppi_width { + PPI_WIDTH_8_BITS, + PPI_WIDTH_16_BITS, + PPI_WIDTH_32_BITS, +}; + +struct cmd_header { + u8 cmd_type; + u8 delay; + u8 payload_length; +}; + +struct dw_mipi_dsi2 { + struct drm_bridge bridge; + struct mipi_dsi_host dsi_host; + struct drm_bridge *panel_bridge; + struct device *dev; + struct regmap *regmap; + struct clk *pclk; + struct clk *sys_clk; + + unsigned int lane_mbps; /* per lane */ + u32 channel; + u32 lanes; + u32 format; + unsigned long mode_flags; + + struct drm_display_mode mode; + const struct dw_mipi_dsi2_plat_data *plat_data; +}; + +static inline struct dw_mipi_dsi2 *host_to_dsi2(struct mipi_dsi_host *host) +{ + return container_of(host, struct dw_mipi_dsi2, dsi_host); +} + +static inline struct dw_mipi_dsi2 *bridge_to_dsi2(struct drm_bridge *bridge) +{ + return container_of(bridge, struct dw_mipi_dsi2, bridge); +} + +static int cri_fifos_wait_avail(struct dw_mipi_dsi2 *dsi2) +{ + u32 sts, mask; + int ret; + + mask = CRI_BUSY | CRT_FIFOS_NOT_EMPTY; + ret = regmap_read_poll_timeout(dsi2->regmap, DSI2_CORE_STATUS, sts, + !(sts & mask), 0, CMD_PKT_STATUS_TIMEOUT_US); + if (ret < 0) { + dev_err(dsi2->dev, "command interface is busy\n"); + return ret; + } + + return 0; +} + +static void dw_mipi_dsi2_set_vid_mode(struct dw_mipi_dsi2 *dsi2) +{ + u32 val = 0, mode; + int ret; + + if (dsi2->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HFP) + val |= BLK_HFP_HS_EN; + + if (dsi2->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HBP) + val |= BLK_HBP_HS_EN; + + if (dsi2->mode_flags & MIPI_DSI_MODE_VIDEO_NO_HSA) + val |= BLK_HSA_HS_EN; + + if (dsi2->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) + val |= VID_MODE_TYPE_BURST; + else if (dsi2->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) + val |= VID_MODE_TYPE_NON_BURST_SYNC_PULSES; + else + val |= VID_MODE_TYPE_NON_BURST_SYNC_EVENTS; + + regmap_write(dsi2->regmap, DSI2_DSI_VID_TX_CFG, val); + + regmap_write(dsi2->regmap, DSI2_MODE_CTRL, VIDEO_MODE); + ret = regmap_read_poll_timeout(dsi2->regmap, DSI2_MODE_STATUS, + mode, mode & VIDEO_MODE, + 1000, MODE_STATUS_TIMEOUT_US); + if (ret < 0) + dev_err(dsi2->dev, "failed to enter video mode\n"); +} + +static void dw_mipi_dsi2_set_data_stream_mode(struct dw_mipi_dsi2 *dsi2) +{ + u32 mode; + int ret; + + regmap_write(dsi2->regmap, DSI2_MODE_CTRL, DATA_STREAM_MODE); + ret = regmap_read_poll_timeout(dsi2->regmap, DSI2_MODE_STATUS, + mode, mode & DATA_STREAM_MODE, + 1000, MODE_STATUS_TIMEOUT_US); + if (ret < 0) + dev_err(dsi2->dev, "failed to enter data stream mode\n"); +} + +static void dw_mipi_dsi2_set_cmd_mode(struct dw_mipi_dsi2 *dsi2) +{ + u32 mode; + int ret; + + regmap_write(dsi2->regmap, DSI2_MODE_CTRL, COMMAND_MODE); + ret = regmap_read_poll_timeout(dsi2->regmap, DSI2_MODE_STATUS, + mode, mode & COMMAND_MODE, + 1000, MODE_STATUS_TIMEOUT_US); + if (ret < 0) + dev_err(dsi2->dev, "failed to enter data stream mode\n"); +} + +static void dw_mipi_dsi2_host_softrst(struct dw_mipi_dsi2 *dsi2) +{ + regmap_write(dsi2->regmap, DSI2_SOFT_RESET, 0x0); + usleep_range(50, 100); + regmap_write(dsi2->regmap, DSI2_SOFT_RESET, + SYS_RSTN | PHY_RSTN | IPI_RSTN); +} + +static void dw_mipi_dsi2_phy_clk_mode_cfg(struct dw_mipi_dsi2 *dsi2) +{ + u32 sys_clk, esc_clk_div; + u32 val = 0; + + /* + * clk_type should be NON_CONTINUOUS_CLK before + * initial deskew calibration be sent. + */ + val |= NON_CONTINUOUS_CLK; + + /* The maximum value of the escape clock frequency is 20MHz */ + sys_clk = clk_get_rate(dsi2->sys_clk) / USEC_PER_SEC; + esc_clk_div = DIV_ROUND_UP(sys_clk, 20 * 2); + val |= PHY_LPTX_CLK_DIV(esc_clk_div); + + regmap_write(dsi2->regmap, DSI2_PHY_CLK_CFG, val); +} + +static void dw_mipi_dsi2_phy_ratio_cfg(struct dw_mipi_dsi2 *dsi2) +{ + struct drm_display_mode *mode = &dsi2->mode; + u64 sys_clk = clk_get_rate(dsi2->sys_clk); + u64 pixel_clk, ipi_clk, phy_hsclk; + u64 tmp; + + /* + * in DPHY mode, the phy_hstx_clk is exactly 1/16 the Lane high-speed + * data rate; In CPHY mode, the phy_hstx_clk is exactly 1/7 the trio + * high speed symbol rate. + */ + phy_hsclk = DIV_ROUND_CLOSEST_ULL(dsi2->lane_mbps * USEC_PER_SEC, 16); + + /* IPI_RATIO_MAN_CFG = PHY_HSTX_CLK / IPI_CLK */ + pixel_clk = mode->crtc_clock * MSEC_PER_SEC; + ipi_clk = pixel_clk / 4; + + tmp = DIV_ROUND_CLOSEST_ULL(phy_hsclk << 16, ipi_clk); + regmap_write(dsi2->regmap, DSI2_PHY_IPI_RATIO_MAN_CFG, + PHY_IPI_RATIO(tmp)); + + /* + * SYS_RATIO_MAN_CFG = MIPI_DCPHY_HSCLK_Freq / MIPI_DCPHY_HSCLK_Freq + */ + tmp = DIV_ROUND_CLOSEST_ULL(phy_hsclk << 16, sys_clk); + regmap_write(dsi2->regmap, DSI2_PHY_SYS_RATIO_MAN_CFG, + PHY_SYS_RATIO(tmp)); +} + +static void dw_mipi_dsi2_lp2hs_or_hs2lp_cfg(struct dw_mipi_dsi2 *dsi2) +{ + const struct dw_mipi_dsi2_phy_ops *phy_ops = dsi2->plat_data->phy_ops; + struct dw_mipi_dsi2_phy_timing timing; + int ret; + + ret = phy_ops->get_timing(dsi2->plat_data->priv_data, + dsi2->lane_mbps, &timing); + if (ret) + dev_err(dsi2->dev, "Retrieving phy timings failed\n"); + + regmap_write(dsi2->regmap, DSI2_PHY_LP2HS_MAN_CFG, PHY_LP2HS_TIME(timing.data_lp2hs)); + regmap_write(dsi2->regmap, DSI2_PHY_HS2LP_MAN_CFG, PHY_HS2LP_TIME(timing.data_hs2lp)); +} + +static void dw_mipi_dsi2_phy_init(struct dw_mipi_dsi2 *dsi2) +{ + const struct dw_mipi_dsi2_phy_ops *phy_ops = dsi2->plat_data->phy_ops; + struct dw_mipi_dsi2_phy_iface iface; + u32 val = 0; + + phy_ops->get_interface(dsi2->plat_data->priv_data, &iface); + + switch (iface.ppi_width) { + case 8: + val |= PPI_WIDTH(PPI_WIDTH_8_BITS); + break; + case 16: + val |= PPI_WIDTH(PPI_WIDTH_16_BITS); + break; + case 32: + val |= PPI_WIDTH(PPI_WIDTH_32_BITS); + break; + default: + /* Caught in probe */ + break; + } + + val |= PHY_LANES(dsi2->lanes); + val |= PHY_TYPE(DW_MIPI_DSI2_DPHY); + regmap_write(dsi2->regmap, DSI2_PHY_MODE_CFG, val); + + dw_mipi_dsi2_phy_clk_mode_cfg(dsi2); + dw_mipi_dsi2_phy_ratio_cfg(dsi2); + dw_mipi_dsi2_lp2hs_or_hs2lp_cfg(dsi2); + + /* phy configuration 8 - 10 */ +} + +static void dw_mipi_dsi2_tx_option_set(struct dw_mipi_dsi2 *dsi2) +{ + u32 val; + + val = BTA_EN | EOTP_TX_EN; + + if (dsi2->mode_flags & MIPI_DSI_MODE_NO_EOT_PACKET) + val &= ~EOTP_TX_EN; + + regmap_write(dsi2->regmap, DSI2_DSI_GENERAL_CFG, val); + regmap_write(dsi2->regmap, DSI2_DSI_VCID_CFG, TX_VCID(dsi2->channel)); +} + +static void dw_mipi_dsi2_ipi_color_coding_cfg(struct dw_mipi_dsi2 *dsi2) +{ + u32 val, color_depth; + + switch (dsi2->format) { + case MIPI_DSI_FMT_RGB666: + case MIPI_DSI_FMT_RGB666_PACKED: + color_depth = IPI_DEPTH_6_BITS; + break; + case MIPI_DSI_FMT_RGB565: + color_depth = IPI_DEPTH_5_6_5_BITS; + break; + case MIPI_DSI_FMT_RGB888: + default: + color_depth = IPI_DEPTH_8_BITS; + break; + } + + val = IPI_DEPTH(color_depth) | + IPI_FORMAT(IPI_FORMAT_RGB); + regmap_write(dsi2->regmap, DSI2_IPI_COLOR_MAN_CFG, val); +} + +static void dw_mipi_dsi2_vertical_timing_config(struct dw_mipi_dsi2 *dsi2, + const struct drm_display_mode *mode) +{ + u32 vactive, vsa, vfp, vbp; + + vactive = mode->vdisplay; + vsa = mode->vsync_end - mode->vsync_start; + vfp = mode->vsync_start - mode->vdisplay; + vbp = mode->vtotal - mode->vsync_end; + + regmap_write(dsi2->regmap, DSI2_IPI_VID_VSA_MAN_CFG, VID_VSA_LINES(vsa)); + regmap_write(dsi2->regmap, DSI2_IPI_VID_VBP_MAN_CFG, VID_VBP_LINES(vbp)); + regmap_write(dsi2->regmap, DSI2_IPI_VID_VACT_MAN_CFG, VID_VACT_LINES(vactive)); + regmap_write(dsi2->regmap, DSI2_IPI_VID_VFP_MAN_CFG, VID_VFP_LINES(vfp)); +} + +static void dw_mipi_dsi2_ipi_set(struct dw_mipi_dsi2 *dsi2) +{ + struct drm_display_mode *mode = &dsi2->mode; + u32 hline, hsa, hbp, hact; + u64 hline_time, hsa_time, hbp_time, hact_time, tmp; + u64 pixel_clk, phy_hs_clk; + u16 val; + + val = mode->hdisplay; + + regmap_write(dsi2->regmap, DSI2_IPI_PIX_PKT_CFG, MAX_PIX_PKT(val)); + + dw_mipi_dsi2_ipi_color_coding_cfg(dsi2); + + /* + * if the controller is intended to operate in data stream mode, + * no more steps are required. + */ + if (!(dsi2->mode_flags & MIPI_DSI_MODE_VIDEO)) + return; + + hact = mode->hdisplay; + hsa = mode->hsync_end - mode->hsync_start; + hbp = mode->htotal - mode->hsync_end; + hline = mode->htotal; + + pixel_clk = mode->crtc_clock * MSEC_PER_SEC; + + phy_hs_clk = DIV_ROUND_CLOSEST_ULL(dsi2->lane_mbps * USEC_PER_SEC, 16); + + tmp = hsa * phy_hs_clk; + hsa_time = DIV_ROUND_CLOSEST_ULL(tmp << 16, pixel_clk); + regmap_write(dsi2->regmap, DSI2_IPI_VID_HSA_MAN_CFG, VID_HSA_TIME(hsa_time)); + + tmp = hbp * phy_hs_clk; + hbp_time = DIV_ROUND_CLOSEST_ULL(tmp << 16, pixel_clk); + regmap_write(dsi2->regmap, DSI2_IPI_VID_HBP_MAN_CFG, VID_HBP_TIME(hbp_time)); + + tmp = hact * phy_hs_clk; + hact_time = DIV_ROUND_CLOSEST_ULL(tmp << 16, pixel_clk); + regmap_write(dsi2->regmap, DSI2_IPI_VID_HACT_MAN_CFG, VID_HACT_TIME(hact_time)); + + tmp = hline * phy_hs_clk; + hline_time = DIV_ROUND_CLOSEST_ULL(tmp << 16, pixel_clk); + regmap_write(dsi2->regmap, DSI2_IPI_VID_HLINE_MAN_CFG, VID_HLINE_TIME(hline_time)); + + dw_mipi_dsi2_vertical_timing_config(dsi2, mode); +} + +static void +dw_mipi_dsi2_work_mode(struct dw_mipi_dsi2 *dsi2, u32 mode) +{ + /* + * select controller work in Manual mode + * Manual: MANUAL_MODE_EN + * Automatic: 0 + */ + regmap_write(dsi2->regmap, MANUAL_MODE_CFG, mode); +} + +static int dw_mipi_dsi2_host_attach(struct mipi_dsi_host *host, + struct mipi_dsi_device *device) +{ + struct dw_mipi_dsi2 *dsi2 = host_to_dsi2(host); + const struct dw_mipi_dsi2_plat_data *pdata = dsi2->plat_data; + struct drm_bridge *bridge; + int ret; + + if (device->lanes > dsi2->plat_data->max_data_lanes) { + dev_err(dsi2->dev, "the number of data lanes(%u) is too many\n", + device->lanes); + return -EINVAL; + } + + dsi2->lanes = device->lanes; + dsi2->channel = device->channel; + dsi2->format = device->format; + dsi2->mode_flags = device->mode_flags; + + bridge = devm_drm_of_get_bridge(dsi2->dev, dsi2->dev->of_node, 1, 0); + if (IS_ERR(bridge)) + return PTR_ERR(bridge); + + bridge->pre_enable_prev_first = true; + dsi2->panel_bridge = bridge; + + drm_bridge_add(&dsi2->bridge); + + if (pdata->host_ops && pdata->host_ops->attach) { + ret = pdata->host_ops->attach(pdata->priv_data, device); + if (ret < 0) + return ret; + } + + return 0; +} + +static int dw_mipi_dsi2_host_detach(struct mipi_dsi_host *host, + struct mipi_dsi_device *device) +{ + struct dw_mipi_dsi2 *dsi2 = host_to_dsi2(host); + const struct dw_mipi_dsi2_plat_data *pdata = dsi2->plat_data; + int ret; + + if (pdata->host_ops && pdata->host_ops->detach) { + ret = pdata->host_ops->detach(pdata->priv_data, device); + if (ret < 0) + return ret; + } + + drm_bridge_remove(&dsi2->bridge); + + drm_of_panel_bridge_remove(host->dev->of_node, 1, 0); + + return 0; +} + +static int dw_mipi_dsi2_gen_pkt_hdr_write(struct dw_mipi_dsi2 *dsi2, + u32 hdr_val, bool lpm) +{ + int ret; + + regmap_write(dsi2->regmap, DSI2_CRI_TX_HDR, hdr_val | CMD_TX_MODE(lpm)); + + ret = cri_fifos_wait_avail(dsi2); + if (ret) { + dev_err(dsi2->dev, "failed to write command header\n"); + return ret; + } + + return 0; +} + +static int dw_mipi_dsi2_write(struct dw_mipi_dsi2 *dsi2, + const struct mipi_dsi_packet *packet, bool lpm) +{ + const u8 *tx_buf = packet->payload; + int len = packet->payload_length, pld_data_bytes = sizeof(u32); + __le32 word; + + /* Send payload */ + while (len) { + if (len < pld_data_bytes) { + word = 0; + memcpy(&word, tx_buf, len); + regmap_write(dsi2->regmap, DSI2_CRI_TX_PLD, le32_to_cpu(word)); + len = 0; + } else { + memcpy(&word, tx_buf, pld_data_bytes); + regmap_write(dsi2->regmap, DSI2_CRI_TX_PLD, le32_to_cpu(word)); + tx_buf += pld_data_bytes; + len -= pld_data_bytes; + } + } + + word = 0; + memcpy(&word, packet->header, sizeof(packet->header)); + return dw_mipi_dsi2_gen_pkt_hdr_write(dsi2, le32_to_cpu(word), lpm); +} + +static int dw_mipi_dsi2_read(struct dw_mipi_dsi2 *dsi2, + const struct mipi_dsi_msg *msg) +{ + u8 *payload = msg->rx_buf; + int i, j, ret, len = msg->rx_len; + u8 data_type; + u16 wc; + u32 val; + + ret = regmap_read_poll_timeout(dsi2->regmap, DSI2_CORE_STATUS, + val, val & CRI_RD_DATA_AVAIL, + 100, CMD_PKT_STATUS_TIMEOUT_US); + if (ret) { + dev_err(dsi2->dev, "CRI has no available read data\n"); + return ret; + } + + regmap_read(dsi2->regmap, DSI2_CRI_RX_HDR, &val); + data_type = val & 0x3f; + + if (mipi_dsi_packet_format_is_short(data_type)) { + for (i = 0; i < len && i < 2; i++) + payload[i] = (val >> (8 * (i + 1))) & 0xff; + + return 0; + } + + wc = (val >> 8) & 0xffff; + /* Receive payload */ + for (i = 0; i < len && i < wc; i += 4) { + regmap_read(dsi2->regmap, DSI2_CRI_RX_PLD, &val); + for (j = 0; j < 4 && j + i < len && j + i < wc; j++) + payload[i + j] = val >> (8 * j); + } + + return 0; +} + +static ssize_t dw_mipi_dsi2_host_transfer(struct mipi_dsi_host *host, + const struct mipi_dsi_msg *msg) +{ + struct dw_mipi_dsi2 *dsi2 = host_to_dsi2(host); + bool lpm = msg->flags & MIPI_DSI_MSG_USE_LPM; + struct mipi_dsi_packet packet; + int ret, nb_bytes; + + regmap_update_bits(dsi2->regmap, DSI2_DSI_VID_TX_CFG, + LPDT_DISPLAY_CMD_EN, + lpm ? LPDT_DISPLAY_CMD_EN : 0); + + /* create a packet to the DSI protocol */ + ret = mipi_dsi_create_packet(&packet, msg); + if (ret) { + dev_err(dsi2->dev, "failed to create packet: %d\n", ret); + return ret; + } + + ret = cri_fifos_wait_avail(dsi2); + if (ret) + return ret; + + ret = dw_mipi_dsi2_write(dsi2, &packet, lpm); + if (ret) + return ret; + + if (msg->rx_buf && msg->rx_len) { + ret = dw_mipi_dsi2_read(dsi2, msg); + if (ret < 0) + return ret; + nb_bytes = msg->rx_len; + } else { + nb_bytes = packet.size; + } + + return nb_bytes; +} + +static const struct mipi_dsi_host_ops dw_mipi_dsi2_host_ops = { + .attach = dw_mipi_dsi2_host_attach, + .detach = dw_mipi_dsi2_host_detach, + .transfer = dw_mipi_dsi2_host_transfer, +}; + +static u32 * +dw_mipi_dsi2_bridge_atomic_get_input_bus_fmts(struct drm_bridge *bridge, + struct drm_bridge_state *bridge_state, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state, + u32 output_fmt, + unsigned int *num_input_fmts) +{ + struct dw_mipi_dsi2 *dsi2 = bridge_to_dsi2(bridge); + const struct dw_mipi_dsi2_plat_data *pdata = dsi2->plat_data; + u32 *input_fmts; + + if (pdata->get_input_bus_fmts) + return pdata->get_input_bus_fmts(pdata->priv_data, + bridge, bridge_state, + crtc_state, conn_state, + output_fmt, num_input_fmts); + + /* Fall back to MEDIA_BUS_FMT_FIXED as the only input format. */ + input_fmts = kmalloc(sizeof(*input_fmts), GFP_KERNEL); + if (!input_fmts) + return NULL; + input_fmts[0] = MEDIA_BUS_FMT_FIXED; + *num_input_fmts = 1; + + return input_fmts; +} + +static int dw_mipi_dsi2_bridge_atomic_check(struct drm_bridge *bridge, + struct drm_bridge_state *bridge_state, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct dw_mipi_dsi2 *dsi2 = bridge_to_dsi2(bridge); + const struct dw_mipi_dsi2_plat_data *pdata = dsi2->plat_data; + bool ret; + + bridge_state->input_bus_cfg.flags = + DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE; + + if (pdata->mode_fixup) { + ret = pdata->mode_fixup(pdata->priv_data, &crtc_state->mode, + &crtc_state->adjusted_mode); + if (!ret) { + DRM_DEBUG_DRIVER("failed to fixup mode " DRM_MODE_FMT "\n", + DRM_MODE_ARG(&crtc_state->mode)); + return -EINVAL; + } + } + + return 0; +} + +static void dw_mipi_dsi2_bridge_post_atomic_disable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) +{ + struct dw_mipi_dsi2 *dsi2 = bridge_to_dsi2(bridge); + const struct dw_mipi_dsi2_phy_ops *phy_ops = dsi2->plat_data->phy_ops; + + regmap_write(dsi2->regmap, DSI2_IPI_PIX_PKT_CFG, 0); + + /* + * Switch to command mode before panel-bridge post_disable & + * panel unprepare. + * Note: panel-bridge disable & panel disable has been called + * before by the drm framework. + */ + dw_mipi_dsi2_set_cmd_mode(dsi2); + + regmap_write(dsi2->regmap, DSI2_PWR_UP, RESET); + + if (phy_ops->power_off) + phy_ops->power_off(dsi2->plat_data->priv_data); + + clk_disable_unprepare(dsi2->sys_clk); + clk_disable_unprepare(dsi2->pclk); + pm_runtime_put(dsi2->dev); +} + +static unsigned int dw_mipi_dsi2_get_lanes(struct dw_mipi_dsi2 *dsi2) +{ + /* single-dsi, so no other instance to consider */ + return dsi2->lanes; +} + +static void dw_mipi_dsi2_mode_set(struct dw_mipi_dsi2 *dsi2, + const struct drm_display_mode *adjusted_mode) +{ + const struct dw_mipi_dsi2_phy_ops *phy_ops = dsi2->plat_data->phy_ops; + void *priv_data = dsi2->plat_data->priv_data; + u32 lanes = dw_mipi_dsi2_get_lanes(dsi2); + int ret; + + clk_prepare_enable(dsi2->pclk); + clk_prepare_enable(dsi2->sys_clk); + + ret = phy_ops->get_lane_mbps(priv_data, adjusted_mode, dsi2->mode_flags, + lanes, dsi2->format, &dsi2->lane_mbps); + if (ret) + DRM_DEBUG_DRIVER("Phy get_lane_mbps() failed\n"); + + pm_runtime_get_sync(dsi2->dev); + + dw_mipi_dsi2_host_softrst(dsi2); + regmap_write(dsi2->regmap, DSI2_PWR_UP, RESET); + + dw_mipi_dsi2_work_mode(dsi2, MANUAL_MODE_EN); + dw_mipi_dsi2_phy_init(dsi2); + + if (phy_ops->power_on) + phy_ops->power_on(dsi2->plat_data->priv_data); + + dw_mipi_dsi2_tx_option_set(dsi2); + + /* + * initial deskew calibration is send after phy_power_on, + * then we can configure clk_type. + */ + + regmap_update_bits(dsi2->regmap, DSI2_PHY_CLK_CFG, CLK_TYPE_MASK, + dsi2->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS ? NON_CONTINUOUS_CLK : + CONTINUOUS_CLK); + + regmap_write(dsi2->regmap, DSI2_PWR_UP, POWER_UP); + dw_mipi_dsi2_set_cmd_mode(dsi2); + + dw_mipi_dsi2_ipi_set(dsi2); +} + +static void dw_mipi_dsi2_bridge_atomic_pre_enable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) +{ + struct dw_mipi_dsi2 *dsi2 = bridge_to_dsi2(bridge); + + /* Power up the dsi ctl into a command mode */ + dw_mipi_dsi2_mode_set(dsi2, &dsi2->mode); +} + +static void dw_mipi_dsi2_bridge_mode_set(struct drm_bridge *bridge, + const struct drm_display_mode *mode, + const struct drm_display_mode *adjusted_mode) +{ + struct dw_mipi_dsi2 *dsi2 = bridge_to_dsi2(bridge); + + /* Store the display mode for later use in pre_enable callback */ + drm_mode_copy(&dsi2->mode, adjusted_mode); +} + +static void dw_mipi_dsi2_bridge_atomic_enable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) +{ + struct dw_mipi_dsi2 *dsi2 = bridge_to_dsi2(bridge); + + /* Switch to video mode for panel-bridge enable & panel enable */ + if (dsi2->mode_flags & MIPI_DSI_MODE_VIDEO) + dw_mipi_dsi2_set_vid_mode(dsi2); + else + dw_mipi_dsi2_set_data_stream_mode(dsi2); +} + +static enum drm_mode_status +dw_mipi_dsi2_bridge_mode_valid(struct drm_bridge *bridge, + const struct drm_display_info *info, + const struct drm_display_mode *mode) +{ + struct dw_mipi_dsi2 *dsi2 = bridge_to_dsi2(bridge); + const struct dw_mipi_dsi2_plat_data *pdata = dsi2->plat_data; + enum drm_mode_status mode_status = MODE_OK; + + if (pdata->mode_valid) + mode_status = pdata->mode_valid(pdata->priv_data, mode, + dsi2->mode_flags, + dw_mipi_dsi2_get_lanes(dsi2), + dsi2->format); + + return mode_status; +} + +static int dw_mipi_dsi2_bridge_attach(struct drm_bridge *bridge, + enum drm_bridge_attach_flags flags) +{ + struct dw_mipi_dsi2 *dsi2 = bridge_to_dsi2(bridge); + + /* Set the encoder type as caller does not know it */ + bridge->encoder->encoder_type = DRM_MODE_ENCODER_DSI; + + /* Attach the panel-bridge to the dsi bridge */ + return drm_bridge_attach(bridge->encoder, dsi2->panel_bridge, bridge, + flags); +} + +static const struct drm_bridge_funcs dw_mipi_dsi2_bridge_funcs = { + .atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_bridge_destroy_state, + .atomic_get_input_bus_fmts = dw_mipi_dsi2_bridge_atomic_get_input_bus_fmts, + .atomic_check = dw_mipi_dsi2_bridge_atomic_check, + .atomic_reset = drm_atomic_helper_bridge_reset, + .atomic_pre_enable = dw_mipi_dsi2_bridge_atomic_pre_enable, + .atomic_enable = dw_mipi_dsi2_bridge_atomic_enable, + .atomic_post_disable = dw_mipi_dsi2_bridge_post_atomic_disable, + .mode_set = dw_mipi_dsi2_bridge_mode_set, + .mode_valid = dw_mipi_dsi2_bridge_mode_valid, + .attach = dw_mipi_dsi2_bridge_attach, +}; + +static const struct regmap_config dw_mipi_dsi2_regmap_config = { + .name = "dsi2-host", + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .fast_io = true, +}; + +static struct dw_mipi_dsi2 * +__dw_mipi_dsi2_probe(struct platform_device *pdev, + const struct dw_mipi_dsi2_plat_data *plat_data) +{ + struct device *dev = &pdev->dev; + struct reset_control *apb_rst; + struct dw_mipi_dsi2 *dsi2; + int ret; + + dsi2 = devm_kzalloc(dev, sizeof(*dsi2), GFP_KERNEL); + if (!dsi2) + return ERR_PTR(-ENOMEM); + + dsi2->dev = dev; + dsi2->plat_data = plat_data; + + if (!plat_data->phy_ops->init || !plat_data->phy_ops->get_lane_mbps || + !plat_data->phy_ops->get_timing) + return dev_err_ptr_probe(dev, -ENODEV, "Phy not properly configured\n"); + + if (!plat_data->regmap) { + void __iomem *base = devm_platform_ioremap_resource(pdev, 0); + + if (IS_ERR(base)) + return dev_err_cast_probe(dev, base, "failed to registers\n"); + + dsi2->regmap = devm_regmap_init_mmio(dev, base, + &dw_mipi_dsi2_regmap_config); + if (IS_ERR(dsi2->regmap)) + return dev_err_cast_probe(dev, dsi2->regmap, "failed to init regmap\n"); + } else { + dsi2->regmap = plat_data->regmap; + } + + dsi2->pclk = devm_clk_get(dev, "pclk"); + if (IS_ERR(dsi2->pclk)) + return dev_err_cast_probe(dev, dsi2->pclk, "Unable to get pclk\n"); + + dsi2->sys_clk = devm_clk_get(dev, "sys"); + if (IS_ERR(dsi2->sys_clk)) + return dev_err_cast_probe(dev, dsi2->sys_clk, "Unable to get sys_clk\n"); + + /* + * Note that the reset was not defined in the initial device tree, so + * we have to be prepared for it not being found. + */ + apb_rst = devm_reset_control_get_optional_exclusive(dev, "apb"); + if (IS_ERR(apb_rst)) + return dev_err_cast_probe(dev, apb_rst, "Unable to get reset control\n"); + + if (apb_rst) { + ret = clk_prepare_enable(dsi2->pclk); + if (ret) { + dev_err(dev, "%s: Failed to enable pclk\n", __func__); + return ERR_PTR(ret); + } + + reset_control_assert(apb_rst); + usleep_range(10, 20); + reset_control_deassert(apb_rst); + + clk_disable_unprepare(dsi2->pclk); + } + + devm_pm_runtime_enable(dev); + + dsi2->dsi_host.ops = &dw_mipi_dsi2_host_ops; + dsi2->dsi_host.dev = dev; + ret = mipi_dsi_host_register(&dsi2->dsi_host); + if (ret) { + dev_err(dev, "Failed to register MIPI host: %d\n", ret); + pm_runtime_disable(dev); + return ERR_PTR(ret); + } + + dsi2->bridge.driver_private = dsi2; + dsi2->bridge.funcs = &dw_mipi_dsi2_bridge_funcs; + dsi2->bridge.of_node = pdev->dev.of_node; + + return dsi2; +} + +static void __dw_mipi_dsi2_remove(struct dw_mipi_dsi2 *dsi2) +{ + mipi_dsi_host_unregister(&dsi2->dsi_host); +} + +/* + * Probe/remove API, used to create the bridge instance. + */ +struct dw_mipi_dsi2 * +dw_mipi_dsi2_probe(struct platform_device *pdev, + const struct dw_mipi_dsi2_plat_data *plat_data) +{ + return __dw_mipi_dsi2_probe(pdev, plat_data); +} +EXPORT_SYMBOL_GPL(dw_mipi_dsi2_probe); + +void dw_mipi_dsi2_remove(struct dw_mipi_dsi2 *dsi2) +{ + __dw_mipi_dsi2_remove(dsi2); +} +EXPORT_SYMBOL_GPL(dw_mipi_dsi2_remove); + +/* + * Bind/unbind API, used from platforms based on the component framework + * to attach the bridge to an encoder. + */ +int dw_mipi_dsi2_bind(struct dw_mipi_dsi2 *dsi2, struct drm_encoder *encoder) +{ + return drm_bridge_attach(encoder, &dsi2->bridge, NULL, 0); +} +EXPORT_SYMBOL_GPL(dw_mipi_dsi2_bind); + +void dw_mipi_dsi2_unbind(struct dw_mipi_dsi2 *dsi2) +{ +} +EXPORT_SYMBOL_GPL(dw_mipi_dsi2_unbind); + +MODULE_AUTHOR("Guochun Huang <hero.huang@rock-chips.com>"); +MODULE_AUTHOR("Heiko Stuebner <heiko.stuebner@cherry.de>"); +MODULE_DESCRIPTION("DW MIPI DSI2 host controller driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:dw-mipi-dsi2"); diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 7275e66faefc..4637bf6ea7a3 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -2587,7 +2587,7 @@ static void tc_remove(struct i2c_client *client) } static const struct i2c_device_id tc358767_i2c_ids[] = { - { "tc358767", 0 }, + { "tc358767" }, { } }; MODULE_DEVICE_TABLE(i2c, tc358767_i2c_ids); diff --git a/drivers/gpu/drm/bridge/tc358768.c b/drivers/gpu/drm/bridge/tc358768.c index 2cb748bbefcd..ec79b0dd0e2c 100644 --- a/drivers/gpu/drm/bridge/tc358768.c +++ b/drivers/gpu/drm/bridge/tc358768.c @@ -1244,8 +1244,8 @@ static const struct regmap_config tc358768_regmap_config = { }; static const struct i2c_device_id tc358768_i2c_ids[] = { - { "tc358768", 0 }, - { "tc358778", 0 }, + { "tc358768" }, + { "tc358778" }, { } }; MODULE_DEVICE_TABLE(i2c, tc358768_i2c_ids); diff --git a/drivers/gpu/drm/bridge/ti-dlpc3433.c b/drivers/gpu/drm/bridge/ti-dlpc3433.c index a0a1b5dd794e..eaec70fa42b6 100644 --- a/drivers/gpu/drm/bridge/ti-dlpc3433.c +++ b/drivers/gpu/drm/bridge/ti-dlpc3433.c @@ -389,7 +389,7 @@ static void dlpc3433_remove(struct i2c_client *client) } static const struct i2c_device_id dlpc3433_id[] = { - { "ti,dlpc3433", 0 }, + { "ti,dlpc3433" }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(i2c, dlpc3433_id); diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi83.c b/drivers/gpu/drm/bridge/ti-sn65dsi83.c index 57a7ed13f996..336380114eea 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi83.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi83.c @@ -132,6 +132,16 @@ #define REG_IRQ_STAT_CHA_SOT_BIT_ERR BIT(2) #define REG_IRQ_STAT_CHA_PLL_UNLOCK BIT(0) +enum sn65dsi83_channel { + CHANNEL_A, + CHANNEL_B +}; + +enum sn65dsi83_lvds_term { + OHM_100, + OHM_200 +}; + enum sn65dsi83_model { MODEL_SN65DSI83, MODEL_SN65DSI84, @@ -147,6 +157,8 @@ struct sn65dsi83 { struct regulator *vcc; bool lvds_dual_link; bool lvds_dual_link_even_odd_swap; + int lvds_vod_swing_conf[2]; + int lvds_term_conf[2]; }; static const struct regmap_range sn65dsi83_readable_ranges[] = { @@ -237,6 +249,36 @@ static const struct regmap_config sn65dsi83_regmap_config = { .max_register = REG_IRQ_STAT, }; +static const int lvds_vod_swing_data_table[2][4][2] = { + { /* 100 Ohm */ + { 180000, 313000 }, + { 215000, 372000 }, + { 250000, 430000 }, + { 290000, 488000 }, + }, + { /* 200 Ohm */ + { 150000, 261000 }, + { 200000, 346000 }, + { 250000, 428000 }, + { 300000, 511000 }, + }, +}; + +static const int lvds_vod_swing_clock_table[2][4][2] = { + { /* 100 Ohm */ + { 140000, 244000 }, + { 168000, 290000 }, + { 195000, 335000 }, + { 226000, 381000 }, + }, + { /* 200 Ohm */ + { 117000, 204000 }, + { 156000, 270000 }, + { 195000, 334000 }, + { 234000, 399000 }, + }, +}; + static struct sn65dsi83 *bridge_to_sn65dsi83(struct drm_bridge *bridge) { return container_of(bridge, struct sn65dsi83, bridge); @@ -435,12 +477,16 @@ static void sn65dsi83_atomic_pre_enable(struct drm_bridge *bridge, val |= REG_LVDS_FMT_LVDS_LINK_CFG; regmap_write(ctx->regmap, REG_LVDS_FMT, val); - regmap_write(ctx->regmap, REG_LVDS_VCOM, 0x05); + regmap_write(ctx->regmap, REG_LVDS_VCOM, + REG_LVDS_VCOM_CHA_LVDS_VOD_SWING(ctx->lvds_vod_swing_conf[CHANNEL_A]) | + REG_LVDS_VCOM_CHB_LVDS_VOD_SWING(ctx->lvds_vod_swing_conf[CHANNEL_B])); regmap_write(ctx->regmap, REG_LVDS_LANE, (ctx->lvds_dual_link_even_odd_swap ? REG_LVDS_LANE_EVEN_ODD_SWAP : 0) | - REG_LVDS_LANE_CHA_LVDS_TERM | - REG_LVDS_LANE_CHB_LVDS_TERM); + (ctx->lvds_term_conf[CHANNEL_A] ? + REG_LVDS_LANE_CHA_LVDS_TERM : 0) | + (ctx->lvds_term_conf[CHANNEL_B] ? + REG_LVDS_LANE_CHB_LVDS_TERM : 0)); regmap_write(ctx->regmap, REG_LVDS_CM, 0x00); le16val = cpu_to_le16(mode->hdisplay); @@ -576,10 +622,103 @@ static const struct drm_bridge_funcs sn65dsi83_funcs = { .atomic_get_input_bus_fmts = sn65dsi83_atomic_get_input_bus_fmts, }; +static int sn65dsi83_select_lvds_vod_swing(struct device *dev, + u32 lvds_vod_swing_data[2], u32 lvds_vod_swing_clk[2], u8 lvds_term) +{ + int i; + + for (i = 0; i <= 3; i++) { + if (lvds_vod_swing_data_table[lvds_term][i][0] >= lvds_vod_swing_data[0] && + lvds_vod_swing_data_table[lvds_term][i][1] <= lvds_vod_swing_data[1] && + lvds_vod_swing_clock_table[lvds_term][i][0] >= lvds_vod_swing_clk[0] && + lvds_vod_swing_clock_table[lvds_term][i][1] <= lvds_vod_swing_clk[1]) + return i; + } + + dev_err(dev, "failed to find appropriate LVDS_VOD_SWING configuration\n"); + return -EINVAL; +} + +static int sn65dsi83_parse_lvds_endpoint(struct sn65dsi83 *ctx, int channel) +{ + struct device *dev = ctx->dev; + struct device_node *endpoint; + int endpoint_reg; + /* Set so the property can be freely selected if not defined */ + u32 lvds_vod_swing_data[2] = { 0, 1000000 }; + u32 lvds_vod_swing_clk[2] = { 0, 1000000 }; + /* Set default near end terminataion to 200 Ohm */ + u32 lvds_term = 200; + int lvds_vod_swing_conf; + int ret = 0; + int ret_data; + int ret_clock; + + if (channel == CHANNEL_A) + endpoint_reg = 2; + else + endpoint_reg = 3; + + endpoint = of_graph_get_endpoint_by_regs(dev->of_node, endpoint_reg, -1); + + of_property_read_u32(endpoint, "ti,lvds-termination-ohms", &lvds_term); + if (lvds_term == 100) + ctx->lvds_term_conf[channel] = OHM_100; + else if (lvds_term == 200) + ctx->lvds_term_conf[channel] = OHM_200; + else { + ret = -EINVAL; + goto exit; + } + + ret_data = of_property_read_u32_array(endpoint, "ti,lvds-vod-swing-data-microvolt", + lvds_vod_swing_data, ARRAY_SIZE(lvds_vod_swing_data)); + if (ret_data != 0 && ret_data != -EINVAL) { + ret = ret_data; + goto exit; + } + + ret_clock = of_property_read_u32_array(endpoint, "ti,lvds-vod-swing-clock-microvolt", + lvds_vod_swing_clk, ARRAY_SIZE(lvds_vod_swing_clk)); + if (ret_clock != 0 && ret_clock != -EINVAL) { + ret = ret_clock; + goto exit; + } + + /* Use default value if both properties are NOT defined. */ + if (ret_data == -EINVAL && ret_clock == -EINVAL) + lvds_vod_swing_conf = 0x1; + + /* Use lookup table if any of the two properties is defined. */ + if (!ret_data || !ret_clock) { + lvds_vod_swing_conf = sn65dsi83_select_lvds_vod_swing(dev, lvds_vod_swing_data, + lvds_vod_swing_clk, ctx->lvds_term_conf[channel]); + if (lvds_vod_swing_conf < 0) { + ret = lvds_vod_swing_conf; + goto exit; + } + } + + ctx->lvds_vod_swing_conf[channel] = lvds_vod_swing_conf; + ret = 0; +exit: + of_node_put(endpoint); + return ret; +} + static int sn65dsi83_parse_dt(struct sn65dsi83 *ctx, enum sn65dsi83_model model) { struct drm_bridge *panel_bridge; struct device *dev = ctx->dev; + int ret; + + ret = sn65dsi83_parse_lvds_endpoint(ctx, CHANNEL_A); + if (ret < 0) + return ret; + + ret = sn65dsi83_parse_lvds_endpoint(ctx, CHANNEL_B); + if (ret < 0) + return ret; ctx->lvds_dual_link = false; ctx->lvds_dual_link_even_odd_swap = false; @@ -606,7 +745,7 @@ static int sn65dsi83_parse_dt(struct sn65dsi83 *ctx, enum sn65dsi83_model model) panel_bridge = devm_drm_of_get_bridge(dev, dev->of_node, 2, 0); if (IS_ERR(panel_bridge)) - return PTR_ERR(panel_bridge); + return dev_err_probe(dev, PTR_ERR(panel_bridge), "Failed to get panel bridge\n"); ctx->panel_bridge = panel_bridge; @@ -732,7 +871,7 @@ static void sn65dsi83_remove(struct i2c_client *client) drm_bridge_remove(&ctx->bridge); } -static struct i2c_device_id sn65dsi83_id[] = { +static const struct i2c_device_id sn65dsi83_id[] = { { "ti,sn65dsi83", MODEL_SN65DSI83 }, { "ti,sn65dsi84", MODEL_SN65DSI84 }, {}, diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index 9e31f750fd88..e4d9006b59f1 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -1970,9 +1970,9 @@ static int ti_sn65dsi86_probe(struct i2c_client *client) return ti_sn65dsi86_add_aux_device(pdata, &pdata->aux_aux, "aux"); } -static struct i2c_device_id ti_sn65dsi86_id[] = { - { "ti,sn65dsi86", 0}, - {}, +static const struct i2c_device_id ti_sn65dsi86_id[] = { + { "ti,sn65dsi86" }, + {} }; MODULE_DEVICE_TABLE(i2c, ti_sn65dsi86_id); diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c b/drivers/gpu/drm/bridge/ti-tfp410.c index 107a2c0b96c9..79ab5da827e1 100644 --- a/drivers/gpu/drm/bridge/ti-tfp410.c +++ b/drivers/gpu/drm/bridge/ti-tfp410.c @@ -435,7 +435,7 @@ static void tfp410_i2c_remove(struct i2c_client *client) } static const struct i2c_device_id tfp410_i2c_ids[] = { - { "tfp410", 0 }, + { "tfp410" }, { } }; MODULE_DEVICE_TABLE(i2c, tfp410_i2c_ids); diff --git a/drivers/gpu/drm/clients/Kconfig b/drivers/gpu/drm/clients/Kconfig new file mode 100644 index 000000000000..6096c623d9d5 --- /dev/null +++ b/drivers/gpu/drm/clients/Kconfig @@ -0,0 +1,123 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config DRM_CLIENT_LIB + tristate + depends on DRM + select DRM_KMS_HELPER if DRM_FBDEV_EMULATION + select FB_CORE if DRM_FBDEV_EMULATION + help + This option enables the DRM client library and selects all + modules and components according to the enabled clients. + +config DRM_CLIENT_SELECTION + tristate + depends on DRM + select DRM_CLIENT_LIB if DRM_CLIENT_LOG + select DRM_CLIENT_LIB if DRM_FBDEV_EMULATION + help + Drivers that support in-kernel DRM clients have to select this + option. + +config DRM_CLIENT_SETUP + bool + depends on DRM_CLIENT_SELECTION + help + Enables the DRM client selection. DRM drivers that support the + default clients should select DRM_CLIENT_SELECTION instead. + +menu "Supported DRM clients" + depends on DRM_CLIENT_SELECTION + +config DRM_FBDEV_EMULATION + bool "Enable legacy fbdev support for your modesetting driver" + depends on DRM_CLIENT_SELECTION + select DRM_CLIENT + select DRM_CLIENT_SETUP + select FRAMEBUFFER_CONSOLE_DETECT_PRIMARY if FRAMEBUFFER_CONSOLE + default FB + help + Choose this option if you have a need for the legacy fbdev + support. Note that this support also provides the linux console + support on top of your modesetting driver. + + If in doubt, say "Y". + +config DRM_FBDEV_OVERALLOC + int "Overallocation of the fbdev buffer" + depends on DRM_FBDEV_EMULATION + default 100 + help + Defines the fbdev buffer overallocation in percent. Default + is 100. Typical values for double buffering will be 200, + triple buffering 300. + +config DRM_FBDEV_LEAK_PHYS_SMEM + bool "Shamelessly allow leaking of fbdev physical address (DANGEROUS)" + depends on DRM_FBDEV_EMULATION && EXPERT + default n + help + In order to keep user-space compatibility, we want in certain + use-cases to keep leaking the fbdev physical address to the + user-space program handling the fbdev buffer. + This affects, not only, Amlogic, Allwinner or Rockchip devices + with ARM Mali GPUs using a userspace Blob. + This option is not supported by upstream developers and should be + removed as soon as possible and be considered as a broken and + legacy behaviour from a modern fbdev device driver. + + Please send any bug reports when using this to your proprietary + software vendor that requires this. + + If in doubt, say "N" or spread the word to your closed source + library vendor. + +config DRM_CLIENT_LOG + bool "Print the kernel boot message on the screen" + depends on DRM_CLIENT_SELECTION + select DRM_CLIENT + select DRM_CLIENT_SETUP + select DRM_DRAW + select FONT_SUPPORT + help + This enable a drm logger, that will print the kernel messages to the + screen until the userspace is ready to take over. + + If you only need logs, but no terminal, or if you prefer userspace + terminal, say "Y". + +choice + prompt "Default DRM Client" + depends on DRM_CLIENT_SELECTION + depends on DRM_FBDEV_EMULATION || DRM_CLIENT_LOG + default DRM_CLIENT_DEFAULT_FBDEV + help + Selects the default drm client. + + The selection made here can be overridden by using the kernel + command line 'drm_client_lib.active=fbdev' option. + +config DRM_CLIENT_DEFAULT_FBDEV + bool "fbdev" + depends on DRM_FBDEV_EMULATION + help + Use fbdev emulation as default drm client. This is needed to have + fbcon on top of a drm driver. + +config DRM_CLIENT_DEFAULT_LOG + bool "log" + depends on DRM_CLIENT_LOG + help + Use drm log as default drm client. This will display boot logs on the + screen, but doesn't implement a full terminal. For that you will need + a userspace terminal using drm/kms. + +endchoice + +config DRM_CLIENT_DEFAULT + string + depends on DRM_CLIENT + default "fbdev" if DRM_CLIENT_DEFAULT_FBDEV + default "log" if DRM_CLIENT_DEFAULT_LOG + default "" + +endmenu diff --git a/drivers/gpu/drm/clients/Makefile b/drivers/gpu/drm/clients/Makefile new file mode 100644 index 000000000000..c16addbc327f --- /dev/null +++ b/drivers/gpu/drm/clients/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 + +subdir-ccflags-y += -I$(src)/.. + +drm_client_lib-y := drm_client_setup.o +drm_client_lib-$(CONFIG_DRM_CLIENT_LOG) += drm_log.o +drm_client_lib-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_client.o +obj-$(CONFIG_DRM_CLIENT_LIB) += drm_client_lib.o diff --git a/drivers/gpu/drm/clients/drm_client_internal.h b/drivers/gpu/drm/clients/drm_client_internal.h new file mode 100644 index 000000000000..6dc078bf6503 --- /dev/null +++ b/drivers/gpu/drm/clients/drm_client_internal.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DRM_CLIENT_INTERNAL_H +#define DRM_CLIENT_INTERNAL_H + +struct drm_device; +struct drm_format_info; + +#ifdef CONFIG_DRM_FBDEV_EMULATION +int drm_fbdev_client_setup(struct drm_device *dev, const struct drm_format_info *format); +#else +static inline int drm_fbdev_client_setup(struct drm_device *dev, + const struct drm_format_info *format) +{ + return 0; +} +#endif + +#ifdef CONFIG_DRM_CLIENT_LOG +void drm_log_register(struct drm_device *dev); +#else +static inline void drm_log_register(struct drm_device *dev) {} +#endif + +#endif diff --git a/drivers/gpu/drm/drm_client_setup.c b/drivers/gpu/drm/clients/drm_client_setup.c index c14221ca5a0d..e17265039ca8 100644 --- a/drivers/gpu/drm/drm_client_setup.c +++ b/drivers/gpu/drm/clients/drm_client_setup.c @@ -1,11 +1,18 @@ // SPDX-License-Identifier: MIT -#include <drm/drm_client_setup.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_device.h> -#include <drm/drm_fbdev_client.h> #include <drm/drm_fourcc.h> #include <drm/drm_print.h> +#include "drm_client_internal.h" + +static char drm_client_default[16] = CONFIG_DRM_CLIENT_DEFAULT; +module_param_string(active, drm_client_default, sizeof(drm_client_default), 0444); +MODULE_PARM_DESC(active, + "Choose which drm client to start, default is" + CONFIG_DRM_CLIENT_DEFAULT "]"); + /** * drm_client_setup() - Setup in-kernel DRM clients * @dev: DRM device @@ -24,11 +31,26 @@ */ void drm_client_setup(struct drm_device *dev, const struct drm_format_info *format) { - int ret; - ret = drm_fbdev_client_setup(dev, format); - if (ret) - drm_warn(dev, "Failed to set up DRM client; error %d\n", ret); +#ifdef CONFIG_DRM_FBDEV_EMULATION + if (!strcmp(drm_client_default, "fbdev")) { + int ret; + + ret = drm_fbdev_client_setup(dev, format); + if (ret) + drm_warn(dev, "Failed to set up DRM client; error %d\n", ret); + return; + } +#endif + +#ifdef CONFIG_DRM_CLIENT_LOG + if (!strcmp(drm_client_default, "log")) { + drm_log_register(dev); + return; + } +#endif + if (strcmp(drm_client_default, "")) + drm_warn(dev, "Unknown DRM client %s\n", drm_client_default); } EXPORT_SYMBOL(drm_client_setup); diff --git a/drivers/gpu/drm/drm_fbdev_client.c b/drivers/gpu/drm/clients/drm_fbdev_client.c index 246fb63ab250..f894ba52bdb5 100644 --- a/drivers/gpu/drm/drm_fbdev_client.c +++ b/drivers/gpu/drm/clients/drm_fbdev_client.c @@ -3,11 +3,12 @@ #include <drm/drm_client.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_drv.h> -#include <drm/drm_fbdev_client.h> #include <drm/drm_fb_helper.h> #include <drm/drm_fourcc.h> #include <drm/drm_print.h> +#include "drm_client_internal.h" + /* * struct drm_client_funcs */ @@ -164,4 +165,3 @@ err_drm_client_init: kfree(fb_helper); return ret; } -EXPORT_SYMBOL(drm_fbdev_client_setup); diff --git a/drivers/gpu/drm/clients/drm_log.c b/drivers/gpu/drm/clients/drm_log.c new file mode 100644 index 000000000000..379850c83e51 --- /dev/null +++ b/drivers/gpu/drm/clients/drm_log.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT +/* + * Copyright (c) 2024 Red Hat. + * Author: Jocelyn Falempe <jfalempe@redhat.com> + */ + +#include <linux/console.h> +#include <linux/font.h> +#include <linux/init.h> +#include <linux/iosys-map.h> +#include <linux/module.h> +#include <linux/types.h> + +#include <drm/drm_client.h> +#include <drm/drm_drv.h> +#include <drm/drm_fourcc.h> +#include <drm/drm_framebuffer.h> +#include <drm/drm_print.h> + +#include "drm_client_internal.h" +#include "drm_draw_internal.h" +#include "drm_internal.h" + +MODULE_AUTHOR("Jocelyn Falempe"); +MODULE_DESCRIPTION("DRM boot logger"); +MODULE_LICENSE("GPL"); + +static unsigned int scale = 1; +module_param(scale, uint, 0444); +MODULE_PARM_DESC(scale, "Integer scaling factor for drm_log, default is 1"); + +/** + * DOC: overview + * + * This is a simple graphic logger, to print the kernel message on screen, until + * a userspace application is able to take over. + * It is only for debugging purpose. + */ + +struct drm_log_scanout { + struct drm_client_buffer *buffer; + const struct font_desc *font; + u32 rows; + u32 columns; + u32 scaled_font_h; + u32 scaled_font_w; + u32 line; + u32 format; + u32 px_width; + u32 front_color; + u32 prefix_color; +}; + +struct drm_log { + struct mutex lock; + struct drm_client_dev client; + struct console con; + bool probed; + u32 n_scanout; + struct drm_log_scanout *scanout; +}; + +static struct drm_log *client_to_drm_log(struct drm_client_dev *client) +{ + return container_of(client, struct drm_log, client); +} + +static struct drm_log *console_to_drm_log(struct console *con) +{ + return container_of(con, struct drm_log, con); +} + +static void drm_log_blit(struct iosys_map *dst, unsigned int dst_pitch, + const u8 *src, unsigned int src_pitch, + u32 height, u32 width, u32 px_width, u32 color) +{ + switch (px_width) { + case 2: + drm_draw_blit16(dst, dst_pitch, src, src_pitch, height, width, scale, color); + break; + case 3: + drm_draw_blit24(dst, dst_pitch, src, src_pitch, height, width, scale, color); + break; + case 4: + drm_draw_blit32(dst, dst_pitch, src, src_pitch, height, width, scale, color); + break; + default: + WARN_ONCE(1, "Can't blit with pixel width %d\n", px_width); + } +} + +static void drm_log_clear_line(struct drm_log_scanout *scanout, u32 line) +{ + struct drm_framebuffer *fb = scanout->buffer->fb; + unsigned long height = scanout->scaled_font_h; + struct iosys_map map; + struct drm_rect r = DRM_RECT_INIT(0, line * height, fb->width, height); + + if (drm_client_buffer_vmap_local(scanout->buffer, &map)) + return; + iosys_map_memset(&map, r.y1 * fb->pitches[0], 0, height * fb->pitches[0]); + drm_client_buffer_vunmap_local(scanout->buffer); + drm_client_framebuffer_flush(scanout->buffer, &r); +} + +static void drm_log_draw_line(struct drm_log_scanout *scanout, const char *s, + unsigned int len, unsigned int prefix_len) +{ + struct drm_framebuffer *fb = scanout->buffer->fb; + struct iosys_map map; + const struct font_desc *font = scanout->font; + size_t font_pitch = DIV_ROUND_UP(font->width, 8); + const u8 *src; + u32 px_width = fb->format->cpp[0]; + struct drm_rect r = DRM_RECT_INIT(0, scanout->line * scanout->scaled_font_h, + fb->width, (scanout->line + 1) * scanout->scaled_font_h); + u32 i; + + if (drm_client_buffer_vmap_local(scanout->buffer, &map)) + return; + + iosys_map_incr(&map, r.y1 * fb->pitches[0]); + for (i = 0; i < len && i < scanout->columns; i++) { + u32 color = (i < prefix_len) ? scanout->prefix_color : scanout->front_color; + src = drm_draw_get_char_bitmap(font, s[i], font_pitch); + drm_log_blit(&map, fb->pitches[0], src, font_pitch, + scanout->scaled_font_h, scanout->scaled_font_w, + px_width, color); + iosys_map_incr(&map, scanout->scaled_font_w * px_width); + } + + scanout->line++; + if (scanout->line >= scanout->rows) + scanout->line = 0; + drm_client_buffer_vunmap_local(scanout->buffer); + drm_client_framebuffer_flush(scanout->buffer, &r); +} + +static void drm_log_draw_new_line(struct drm_log_scanout *scanout, + const char *s, unsigned int len, unsigned int prefix_len) +{ + if (scanout->line == 0) { + drm_log_clear_line(scanout, 0); + drm_log_clear_line(scanout, 1); + drm_log_clear_line(scanout, 2); + } else if (scanout->line + 2 < scanout->rows) + drm_log_clear_line(scanout, scanout->line + 2); + + drm_log_draw_line(scanout, s, len, prefix_len); +} + +/* + * Depends on print_time() in printk.c + * Timestamp is written with "[%5lu.%06lu]" + */ +#define TS_PREFIX_LEN 13 + +static void drm_log_draw_kmsg_record(struct drm_log_scanout *scanout, + const char *s, unsigned int len) +{ + u32 prefix_len = 0; + + if (len > TS_PREFIX_LEN && s[0] == '[' && s[6] == '.' && s[TS_PREFIX_LEN] == ']') + prefix_len = TS_PREFIX_LEN + 1; + + /* do not print the ending \n character */ + if (s[len - 1] == '\n') + len--; + + while (len > scanout->columns) { + drm_log_draw_new_line(scanout, s, scanout->columns, prefix_len); + s += scanout->columns; + len -= scanout->columns; + prefix_len = 0; + } + if (len) + drm_log_draw_new_line(scanout, s, len, prefix_len); +} + +static u32 drm_log_find_usable_format(struct drm_plane *plane) +{ + int i; + + for (i = 0; i < plane->format_count; i++) + if (drm_draw_color_from_xrgb8888(0xffffff, plane->format_types[i]) != 0) + return plane->format_types[i]; + return DRM_FORMAT_INVALID; +} + +static int drm_log_setup_modeset(struct drm_client_dev *client, + struct drm_mode_set *mode_set, + struct drm_log_scanout *scanout) +{ + struct drm_crtc *crtc = mode_set->crtc; + u32 width = mode_set->mode->hdisplay; + u32 height = mode_set->mode->vdisplay; + u32 format; + + scanout->font = get_default_font(width, height, NULL, NULL); + if (!scanout->font) + return -ENOENT; + + format = drm_log_find_usable_format(crtc->primary); + if (format == DRM_FORMAT_INVALID) + return -EINVAL; + + scanout->buffer = drm_client_framebuffer_create(client, width, height, format); + if (IS_ERR(scanout->buffer)) { + drm_warn(client->dev, "drm_log can't create framebuffer %d %d %p4cc\n", + width, height, &format); + return -ENOMEM; + } + mode_set->fb = scanout->buffer->fb; + scanout->scaled_font_h = scanout->font->height * scale; + scanout->scaled_font_w = scanout->font->width * scale; + scanout->rows = height / scanout->scaled_font_h; + scanout->columns = width / scanout->scaled_font_w; + scanout->front_color = drm_draw_color_from_xrgb8888(0xffffff, format); + scanout->prefix_color = drm_draw_color_from_xrgb8888(0x4e9a06, format); + return 0; +} + +static int drm_log_count_modeset(struct drm_client_dev *client) +{ + struct drm_mode_set *mode_set; + int count = 0; + + mutex_lock(&client->modeset_mutex); + drm_client_for_each_modeset(mode_set, client) + count++; + mutex_unlock(&client->modeset_mutex); + return count; +} + +static void drm_log_init_client(struct drm_log *dlog) +{ + struct drm_client_dev *client = &dlog->client; + struct drm_mode_set *mode_set; + int i, max_modeset; + int n_modeset = 0; + + dlog->probed = true; + + if (drm_client_modeset_probe(client, 0, 0)) + return; + + max_modeset = drm_log_count_modeset(client); + if (!max_modeset) + return; + + dlog->scanout = kcalloc(max_modeset, sizeof(*dlog->scanout), GFP_KERNEL); + if (!dlog->scanout) + return; + + mutex_lock(&client->modeset_mutex); + drm_client_for_each_modeset(mode_set, client) { + if (!mode_set->mode) + continue; + if (drm_log_setup_modeset(client, mode_set, &dlog->scanout[n_modeset])) + continue; + n_modeset++; + } + mutex_unlock(&client->modeset_mutex); + if (n_modeset == 0) + goto err_nomodeset; + + if (drm_client_modeset_commit(client)) + goto err_failed_commit; + + dlog->n_scanout = n_modeset; + return; + +err_failed_commit: + for (i = 0; i < n_modeset; i++) + drm_client_framebuffer_delete(dlog->scanout[i].buffer); + +err_nomodeset: + kfree(dlog->scanout); + dlog->scanout = NULL; +} + +static void drm_log_free_scanout(struct drm_client_dev *client) +{ + struct drm_log *dlog = client_to_drm_log(client); + int i; + + if (dlog->n_scanout) { + for (i = 0; i < dlog->n_scanout; i++) + drm_client_framebuffer_delete(dlog->scanout[i].buffer); + dlog->n_scanout = 0; + kfree(dlog->scanout); + dlog->scanout = NULL; + } +} + +static void drm_log_client_unregister(struct drm_client_dev *client) +{ + struct drm_log *dlog = client_to_drm_log(client); + struct drm_device *dev = client->dev; + + unregister_console(&dlog->con); + + mutex_lock(&dlog->lock); + drm_log_free_scanout(client); + drm_client_release(client); + mutex_unlock(&dlog->lock); + kfree(dlog); + drm_dbg(dev, "Unregistered with drm log\n"); +} + +static int drm_log_client_hotplug(struct drm_client_dev *client) +{ + struct drm_log *dlog = client_to_drm_log(client); + + mutex_lock(&dlog->lock); + drm_log_free_scanout(client); + dlog->probed = false; + mutex_unlock(&dlog->lock); + return 0; +} + +static int drm_log_client_suspend(struct drm_client_dev *client, bool _console_lock) +{ + struct drm_log *dlog = client_to_drm_log(client); + + console_stop(&dlog->con); + + return 0; +} + +static int drm_log_client_resume(struct drm_client_dev *client, bool _console_lock) +{ + struct drm_log *dlog = client_to_drm_log(client); + + console_start(&dlog->con); + + return 0; +} + +static const struct drm_client_funcs drm_log_client_funcs = { + .owner = THIS_MODULE, + .unregister = drm_log_client_unregister, + .hotplug = drm_log_client_hotplug, + .suspend = drm_log_client_suspend, + .resume = drm_log_client_resume, +}; + +static void drm_log_write_thread(struct console *con, struct nbcon_write_context *wctxt) +{ + struct drm_log *dlog = console_to_drm_log(con); + int i; + + if (!dlog->probed) + drm_log_init_client(dlog); + + /* Check that we are still the master before drawing */ + if (drm_master_internal_acquire(dlog->client.dev)) { + drm_master_internal_release(dlog->client.dev); + + for (i = 0; i < dlog->n_scanout; i++) + drm_log_draw_kmsg_record(&dlog->scanout[i], wctxt->outbuf, wctxt->len); + } +} + +static void drm_log_lock(struct console *con, unsigned long *flags) +{ + struct drm_log *dlog = console_to_drm_log(con); + + mutex_lock(&dlog->lock); + migrate_disable(); +} + +static void drm_log_unlock(struct console *con, unsigned long flags) +{ + struct drm_log *dlog = console_to_drm_log(con); + + migrate_enable(); + mutex_unlock(&dlog->lock); +} + +static void drm_log_register_console(struct console *con) +{ + strscpy(con->name, "drm_log"); + con->write_thread = drm_log_write_thread; + con->device_lock = drm_log_lock; + con->device_unlock = drm_log_unlock; + con->flags = CON_PRINTBUFFER | CON_NBCON; + con->index = -1; + + register_console(con); +} + +/** + * drm_log_register() - Register a drm device to drm_log + * @dev: the drm device to register. + */ +void drm_log_register(struct drm_device *dev) +{ + struct drm_log *new; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + goto err_warn; + + mutex_init(&new->lock); + if (drm_client_init(dev, &new->client, "drm_log", &drm_log_client_funcs)) + goto err_free; + + drm_client_register(&new->client); + + drm_log_register_console(&new->con); + + drm_dbg(dev, "Registered with drm log as %s\n", new->con.name); + return; + +err_free: + kfree(new); +err_warn: + drm_warn(dev, "Failed to register with drm log\n"); +} diff --git a/drivers/gpu/drm/display/drm_bridge_connector.c b/drivers/gpu/drm/display/drm_bridge_connector.c index 320c297008aa..512ced87ea18 100644 --- a/drivers/gpu/drm/display/drm_bridge_connector.c +++ b/drivers/gpu/drm/display/drm_bridge_connector.c @@ -18,6 +18,7 @@ #include <drm/drm_managed.h> #include <drm/drm_modeset_helper_vtables.h> #include <drm/drm_probe_helper.h> +#include <drm/display/drm_hdmi_helper.h> #include <drm/display/drm_hdmi_state_helper.h> /** @@ -299,9 +300,22 @@ static int drm_bridge_connector_get_modes(struct drm_connector *connector) return 0; } +static enum drm_mode_status +drm_bridge_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + struct drm_bridge_connector *bridge_connector = + to_drm_bridge_connector(connector); + + if (bridge_connector->bridge_hdmi) + return drm_hdmi_connector_mode_valid(connector, mode); + + return MODE_OK; +} + static const struct drm_connector_helper_funcs drm_bridge_connector_helper_funcs = { .get_modes = drm_bridge_connector_get_modes, - /* No need for .mode_valid(), the bridges are checked by the core. */ + .mode_valid = drm_bridge_connector_mode_valid, .enable_hpd = drm_bridge_connector_enable_hpd, .disable_hpd = drm_bridge_connector_disable_hpd, }; diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index 6ee51003de3c..da3c8521a7fa 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -22,15 +22,16 @@ #include <linux/backlight.h> #include <linux/delay.h> +#include <linux/dynamic_debug.h> #include <linux/errno.h> #include <linux/i2c.h> #include <linux/init.h> +#include <linux/iopoll.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <linux/string_helpers.h> -#include <linux/dynamic_debug.h> #include <drm/display/drm_dp_helper.h> #include <drm/display/drm_dp_mst_helper.h> @@ -779,6 +780,128 @@ int drm_dp_dpcd_read_phy_link_status(struct drm_dp_aux *aux, } EXPORT_SYMBOL(drm_dp_dpcd_read_phy_link_status); +static int read_payload_update_status(struct drm_dp_aux *aux) +{ + int ret; + u8 status; + + ret = drm_dp_dpcd_readb(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, &status); + if (ret < 0) + return ret; + + return status; +} + +/** + * drm_dp_dpcd_write_payload() - Write Virtual Channel information to payload table + * @aux: DisplayPort AUX channel + * @vcpid: Virtual Channel Payload ID + * @start_time_slot: Starting time slot + * @time_slot_count: Time slot count + * + * Write the Virtual Channel payload allocation table, checking the payload + * update status and retrying as necessary. + * + * Returns: + * 0 on success, negative error otherwise + */ +int drm_dp_dpcd_write_payload(struct drm_dp_aux *aux, + int vcpid, u8 start_time_slot, u8 time_slot_count) +{ + u8 payload_alloc[3], status; + int ret; + int retries = 0; + + drm_dp_dpcd_writeb(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, + DP_PAYLOAD_TABLE_UPDATED); + + payload_alloc[0] = vcpid; + payload_alloc[1] = start_time_slot; + payload_alloc[2] = time_slot_count; + + ret = drm_dp_dpcd_write(aux, DP_PAYLOAD_ALLOCATE_SET, payload_alloc, 3); + if (ret != 3) { + drm_dbg_kms(aux->drm_dev, "failed to write payload allocation %d\n", ret); + goto fail; + } + +retry: + ret = drm_dp_dpcd_readb(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, &status); + if (ret < 0) { + drm_dbg_kms(aux->drm_dev, "failed to read payload table status %d\n", ret); + goto fail; + } + + if (!(status & DP_PAYLOAD_TABLE_UPDATED)) { + retries++; + if (retries < 20) { + usleep_range(10000, 20000); + goto retry; + } + drm_dbg_kms(aux->drm_dev, "status not set after read payload table status %d\n", + status); + ret = -EINVAL; + goto fail; + } + ret = 0; +fail: + return ret; +} +EXPORT_SYMBOL(drm_dp_dpcd_write_payload); + +/** + * drm_dp_dpcd_clear_payload() - Clear the entire VC Payload ID table + * @aux: DisplayPort AUX channel + * + * Clear the entire VC Payload ID table. + * + * Returns: 0 on success, negative error code on errors. + */ +int drm_dp_dpcd_clear_payload(struct drm_dp_aux *aux) +{ + return drm_dp_dpcd_write_payload(aux, 0, 0, 0x3f); +} +EXPORT_SYMBOL(drm_dp_dpcd_clear_payload); + +/** + * drm_dp_dpcd_poll_act_handled() - Poll for ACT handled status + * @aux: DisplayPort AUX channel + * @timeout_ms: Timeout in ms + * + * Try waiting for the sink to finish updating its payload table by polling for + * the ACT handled bit of DP_PAYLOAD_TABLE_UPDATE_STATUS for up to @timeout_ms + * milliseconds, defaulting to 3000 ms if 0. + * + * Returns: + * 0 if the ACT was handled in time, negative error code on failure. + */ +int drm_dp_dpcd_poll_act_handled(struct drm_dp_aux *aux, int timeout_ms) +{ + int ret, status; + + /* default to 3 seconds, this is arbitrary */ + timeout_ms = timeout_ms ?: 3000; + + ret = readx_poll_timeout(read_payload_update_status, aux, status, + status & DP_PAYLOAD_ACT_HANDLED || status < 0, + 200, timeout_ms * USEC_PER_MSEC); + if (ret < 0 && status >= 0) { + drm_err(aux->drm_dev, "Failed to get ACT after %d ms, last status: %02x\n", + timeout_ms, status); + return -EINVAL; + } else if (status < 0) { + /* + * Failure here isn't unexpected - the hub may have + * just been unplugged + */ + drm_dbg_kms(aux->drm_dev, "Failed to read payload table status: %d\n", status); + return status; + } + + return 0; +} +EXPORT_SYMBOL(drm_dp_dpcd_poll_act_handled); + static bool is_edid_digital_input_dp(const struct drm_edid *drm_edid) { /* FIXME: get rid of drm_edid_raw() */ diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index dc4446d589e7..f8cd094efa3c 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -29,7 +29,6 @@ #include <linux/random.h> #include <linux/sched.h> #include <linux/seq_file.h> -#include <linux/iopoll.h> #if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS) #include <linux/stacktrace.h> @@ -68,9 +67,6 @@ static bool dump_dp_payload_table(struct drm_dp_mst_topology_mgr *mgr, static void drm_dp_mst_topology_put_port(struct drm_dp_mst_port *port); -static int drm_dp_dpcd_write_payload(struct drm_dp_mst_topology_mgr *mgr, - int id, u8 start_slot, u8 num_slots); - static int drm_dp_send_dpcd_read(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_port *port, int offset, int size, u8 *bytes); @@ -2285,7 +2281,7 @@ drm_dp_mst_port_add_connector(struct drm_dp_mst_branch *mstb, port->cached_edid = drm_edid_read_ddc(port->connector, &port->aux.ddc); - drm_connector_register(port->connector); + drm_connector_dynamic_register(port->connector); return; error: @@ -3267,7 +3263,7 @@ EXPORT_SYMBOL(drm_dp_send_query_stream_enc_status); static int drm_dp_create_payload_at_dfp(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_atomic_payload *payload) { - return drm_dp_dpcd_write_payload(mgr, payload->vcpi, payload->vc_start_slot, + return drm_dp_dpcd_write_payload(mgr->aux, payload->vcpi, payload->vc_start_slot, payload->time_slots); } @@ -3298,7 +3294,7 @@ static void drm_dp_destroy_payload_at_remote_and_dfp(struct drm_dp_mst_topology_ } if (payload->payload_allocation_status == DRM_DP_MST_PAYLOAD_ALLOCATION_DFP) - drm_dp_dpcd_write_payload(mgr, payload->vcpi, payload->vc_start_slot, 0); + drm_dp_dpcd_write_payload(mgr->aux, payload->vcpi, payload->vc_start_slot, 0); } /** @@ -3686,7 +3682,7 @@ int drm_dp_mst_topology_mgr_set_mst(struct drm_dp_mst_topology_mgr *mgr, bool ms goto out_unlock; /* Write reset payload */ - drm_dp_dpcd_write_payload(mgr, 0, 0, 0x3f); + drm_dp_dpcd_clear_payload(mgr->aux); drm_dp_mst_queue_probe_work(mgr); @@ -4747,61 +4743,6 @@ void drm_dp_mst_update_slots(struct drm_dp_mst_topology_state *mst_state, uint8_ } EXPORT_SYMBOL(drm_dp_mst_update_slots); -static int drm_dp_dpcd_write_payload(struct drm_dp_mst_topology_mgr *mgr, - int id, u8 start_slot, u8 num_slots) -{ - u8 payload_alloc[3], status; - int ret; - int retries = 0; - - drm_dp_dpcd_writeb(mgr->aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, - DP_PAYLOAD_TABLE_UPDATED); - - payload_alloc[0] = id; - payload_alloc[1] = start_slot; - payload_alloc[2] = num_slots; - - ret = drm_dp_dpcd_write(mgr->aux, DP_PAYLOAD_ALLOCATE_SET, payload_alloc, 3); - if (ret != 3) { - drm_dbg_kms(mgr->dev, "failed to write payload allocation %d\n", ret); - goto fail; - } - -retry: - ret = drm_dp_dpcd_readb(mgr->aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, &status); - if (ret < 0) { - drm_dbg_kms(mgr->dev, "failed to read payload table status %d\n", ret); - goto fail; - } - - if (!(status & DP_PAYLOAD_TABLE_UPDATED)) { - retries++; - if (retries < 20) { - usleep_range(10000, 20000); - goto retry; - } - drm_dbg_kms(mgr->dev, "status not set after read payload table status %d\n", - status); - ret = -EINVAL; - goto fail; - } - ret = 0; -fail: - return ret; -} - -static int do_get_act_status(struct drm_dp_aux *aux) -{ - int ret; - u8 status; - - ret = drm_dp_dpcd_readb(aux, DP_PAYLOAD_TABLE_UPDATE_STATUS, &status); - if (ret < 0) - return ret; - - return status; -} - /** * drm_dp_check_act_status() - Polls for ACT handled status. * @mgr: manager to use @@ -4819,28 +4760,9 @@ int drm_dp_check_act_status(struct drm_dp_mst_topology_mgr *mgr) * There doesn't seem to be any recommended retry count or timeout in * the MST specification. Since some hubs have been observed to take * over 1 second to update their payload allocations under certain - * conditions, we use a rather large timeout value. + * conditions, we use a rather large timeout value of 3 seconds. */ - const int timeout_ms = 3000; - int ret, status; - - ret = readx_poll_timeout(do_get_act_status, mgr->aux, status, - status & DP_PAYLOAD_ACT_HANDLED || status < 0, - 200, timeout_ms * USEC_PER_MSEC); - if (ret < 0 && status >= 0) { - drm_err(mgr->dev, "Failed to get ACT after %dms, last status: %02x\n", - timeout_ms, status); - return -EINVAL; - } else if (status < 0) { - /* - * Failure here isn't unexpected - the hub may have - * just been unplugged - */ - drm_dbg_kms(mgr->dev, "Failed to read payload table status: %d\n", status); - return status; - } - - return 0; + return drm_dp_dpcd_poll_act_handled(mgr->aux, 3000); } EXPORT_SYMBOL(drm_dp_check_act_status); diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index feb7a3a75981..d678c635a693 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -347,6 +347,8 @@ static int hdmi_generate_avi_infoframe(const struct drm_connector *connector, is_limited_range ? HDMI_QUANTIZATION_RANGE_LIMITED : HDMI_QUANTIZATION_RANGE_FULL; int ret; + infoframe->set = false; + ret = drm_hdmi_avi_infoframe_from_display_mode(frame, connector, mode); if (ret) return ret; @@ -376,6 +378,8 @@ static int hdmi_generate_spd_infoframe(const struct drm_connector *connector, &infoframe->data.spd; int ret; + infoframe->set = false; + ret = hdmi_spd_infoframe_init(frame, connector->hdmi.vendor, connector->hdmi.product); @@ -398,6 +402,8 @@ static int hdmi_generate_hdr_infoframe(const struct drm_connector *connector, &infoframe->data.drm; int ret; + infoframe->set = false; + if (connector->max_bpc < 10) return 0; @@ -425,6 +431,8 @@ static int hdmi_generate_hdmi_vendor_infoframe(const struct drm_connector *conne &infoframe->data.vendor.hdmi; int ret; + infoframe->set = false; + if (!info->has_hdmi_infoframe) return 0; @@ -521,6 +529,27 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, } EXPORT_SYMBOL(drm_atomic_helper_connector_hdmi_check); +/** + * drm_hdmi_connector_mode_valid() - Check if mode is valid for HDMI connector + * @connector: DRM connector to validate the mode + * @mode: Display mode to validate + * + * Generic .mode_valid implementation for HDMI connectors. + */ +enum drm_mode_status +drm_hdmi_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + unsigned long long clock; + + clock = drm_hdmi_compute_mode_clock(mode, 8, HDMI_COLORSPACE_RGB); + if (!clock) + return MODE_ERROR; + + return hdmi_clock_valid(connector, mode, clock); +} +EXPORT_SYMBOL(drm_hdmi_connector_mode_valid); + static int clear_device_infoframe(struct drm_connector *connector, enum hdmi_infoframe_type type) { diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index fc35f47e2849..ae6e71305f30 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -218,11 +218,11 @@ void drm_connector_free_work_fn(struct work_struct *work) } } -static int __drm_connector_init(struct drm_device *dev, - struct drm_connector *connector, - const struct drm_connector_funcs *funcs, - int connector_type, - struct i2c_adapter *ddc) +static int drm_connector_init_only(struct drm_device *dev, + struct drm_connector *connector, + const struct drm_connector_funcs *funcs, + int connector_type, + struct i2c_adapter *ddc) { struct drm_mode_config *config = &dev->mode_config; int ret; @@ -273,10 +273,12 @@ static int __drm_connector_init(struct drm_device *dev, /* provide ddc symlink in sysfs */ connector->ddc = ddc; + INIT_LIST_HEAD(&connector->head); INIT_LIST_HEAD(&connector->global_connector_list_entry); INIT_LIST_HEAD(&connector->probed_modes); INIT_LIST_HEAD(&connector->modes); mutex_init(&connector->mutex); + mutex_init(&connector->eld_mutex); mutex_init(&connector->edid_override_mutex); mutex_init(&connector->hdmi.infoframes.lock); connector->edid_blob_ptr = NULL; @@ -288,14 +290,6 @@ static int __drm_connector_init(struct drm_device *dev, drm_connector_get_cmdline_mode(connector); - /* We should add connectors at the end to avoid upsetting the connector - * index too much. - */ - spin_lock_irq(&config->connector_list_lock); - list_add_tail(&connector->head, &config->connector_list); - config->num_connector++; - spin_unlock_irq(&config->connector_list_lock); - if (connector_type != DRM_MODE_CONNECTOR_VIRTUAL && connector_type != DRM_MODE_CONNECTOR_WRITEBACK) drm_connector_attach_edid_property(connector); @@ -332,6 +326,54 @@ out_put: return ret; } +static void drm_connector_add(struct drm_connector *connector) +{ + struct drm_device *dev = connector->dev; + struct drm_mode_config *config = &dev->mode_config; + + if (drm_WARN_ON(dev, !list_empty(&connector->head))) + return; + + spin_lock_irq(&config->connector_list_lock); + list_add_tail(&connector->head, &config->connector_list); + config->num_connector++; + spin_unlock_irq(&config->connector_list_lock); +} + +static void drm_connector_remove(struct drm_connector *connector) +{ + struct drm_device *dev = connector->dev; + + /* + * For dynamic connectors drm_connector_cleanup() can call this function + * before the connector is registered and added to the list. + */ + if (list_empty(&connector->head)) + return; + + spin_lock_irq(&dev->mode_config.connector_list_lock); + list_del_init(&connector->head); + dev->mode_config.num_connector--; + spin_unlock_irq(&dev->mode_config.connector_list_lock); +} + +static int drm_connector_init_and_add(struct drm_device *dev, + struct drm_connector *connector, + const struct drm_connector_funcs *funcs, + int connector_type, + struct i2c_adapter *ddc) +{ + int ret; + + ret = drm_connector_init_only(dev, connector, funcs, connector_type, ddc); + if (ret) + return ret; + + drm_connector_add(connector); + + return 0; +} + /** * drm_connector_init - Init a preallocated connector * @dev: DRM device @@ -361,11 +403,52 @@ int drm_connector_init(struct drm_device *dev, if (drm_WARN_ON(dev, !(funcs && funcs->destroy))) return -EINVAL; - return __drm_connector_init(dev, connector, funcs, connector_type, NULL); + return drm_connector_init_and_add(dev, connector, funcs, connector_type, NULL); } EXPORT_SYMBOL(drm_connector_init); /** + * drm_connector_dynamic_init - Init a preallocated dynamic connector + * @dev: DRM device + * @connector: the connector to init + * @funcs: callbacks for this connector + * @connector_type: user visible type of the connector + * @ddc: pointer to the associated ddc adapter + * + * Initialises a preallocated dynamic connector. Connectors should be + * subclassed as part of driver connector objects. The connector + * structure should not be allocated with devm_kzalloc(). + * + * Drivers should call this for dynamic connectors which can be hotplugged + * after drm_dev_register() has been called already, e.g. DP MST connectors. + * For all other - static - connectors, drivers should call one of the + * drm_connector_init*()/drmm_connector_init*() functions. + * + * After calling this function the drivers must call + * drm_connector_dynamic_register(). + * + * To remove the connector the driver must call drm_connector_unregister() + * followed by drm_connector_put(). Putting the last reference will call the + * driver's &drm_connector_funcs.destroy hook, which in turn must call + * drm_connector_cleanup() and free the connector structure. + * + * Returns: + * Zero on success, error code on failure. + */ +int drm_connector_dynamic_init(struct drm_device *dev, + struct drm_connector *connector, + const struct drm_connector_funcs *funcs, + int connector_type, + struct i2c_adapter *ddc) +{ + if (drm_WARN_ON(dev, !(funcs && funcs->destroy))) + return -EINVAL; + + return drm_connector_init_only(dev, connector, funcs, connector_type, ddc); +} +EXPORT_SYMBOL(drm_connector_dynamic_init); + +/** * drm_connector_init_with_ddc - Init a preallocated connector * @dev: DRM device * @connector: the connector to init @@ -398,7 +481,7 @@ int drm_connector_init_with_ddc(struct drm_device *dev, if (drm_WARN_ON(dev, !(funcs && funcs->destroy))) return -EINVAL; - return __drm_connector_init(dev, connector, funcs, connector_type, ddc); + return drm_connector_init_and_add(dev, connector, funcs, connector_type, ddc); } EXPORT_SYMBOL(drm_connector_init_with_ddc); @@ -442,7 +525,7 @@ int drmm_connector_init(struct drm_device *dev, if (drm_WARN_ON(dev, funcs && funcs->destroy)) return -EINVAL; - ret = __drm_connector_init(dev, connector, funcs, connector_type, ddc); + ret = drm_connector_init_and_add(dev, connector, funcs, connector_type, ddc); if (ret) return ret; @@ -659,10 +742,8 @@ void drm_connector_cleanup(struct drm_connector *connector) connector->name = NULL; fwnode_handle_put(connector->fwnode); connector->fwnode = NULL; - spin_lock_irq(&dev->mode_config.connector_list_lock); - list_del(&connector->head); - dev->mode_config.num_connector--; - spin_unlock_irq(&dev->mode_config.connector_list_lock); + + drm_connector_remove(connector); WARN_ON(connector->state && !connector->funcs->atomic_destroy_state); if (connector->state && connector->funcs->atomic_destroy_state) @@ -683,14 +764,17 @@ EXPORT_SYMBOL(drm_connector_cleanup); * drm_connector_register - register a connector * @connector: the connector to register * - * Register userspace interfaces for a connector. Only call this for connectors - * which can be hotplugged after drm_dev_register() has been called already, - * e.g. DP MST connectors. All other connectors will be registered automatically - * when calling drm_dev_register(). + * Register userspace interfaces for a connector. Drivers shouldn't call this + * function. Static connectors will be registered automatically by DRM core + * from drm_dev_register(), dynamic connectors (MST) should be registered by + * drivers calling drm_connector_dynamic_register(). * * When the connector is no longer available, callers must call * drm_connector_unregister(). * + * Note: Existing uses of this function in drivers should be a nop already and + * are scheduled to be removed. + * * Returns: * Zero on success, error code on failure. */ @@ -750,12 +834,43 @@ unlock: EXPORT_SYMBOL(drm_connector_register); /** + * drm_connector_dynamic_register - register a dynamic connector + * @connector: the connector to register + * + * Register userspace interfaces for a connector. Only call this for connectors + * initialized by calling drm_connector_dynamic_init(). All other connectors + * will be registered automatically when calling drm_dev_register(). + * + * When the connector is no longer available the driver must call + * drm_connector_unregister(). + * + * Returns: + * Zero on success, error code on failure. + */ +int drm_connector_dynamic_register(struct drm_connector *connector) +{ + /* Was the connector inited already? */ + if (WARN_ON(!(connector->funcs && connector->funcs->destroy))) + return -EINVAL; + + drm_connector_add(connector); + + return drm_connector_register(connector); +} +EXPORT_SYMBOL(drm_connector_dynamic_register); + +/** * drm_connector_unregister - unregister a connector * @connector: the connector to unregister * - * Unregister userspace interfaces for a connector. Only call this for - * connectors which have been registered explicitly by calling - * drm_connector_register(). + * Unregister userspace interfaces for a connector. Drivers should call this + * for dynamic connectors (MST) only, which were registered explicitly by + * calling drm_connector_dynamic_register(). All other - static - connectors + * will be unregistered automatically by DRM core and drivers shouldn't call + * this function for those. + * + * Note: Existing uses of this function in drivers for static connectors + * should be a nop already and are scheduled to be removed. */ void drm_connector_unregister(struct drm_connector *connector) { diff --git a/drivers/gpu/drm/drm_draw.c b/drivers/gpu/drm/drm_draw.c new file mode 100644 index 000000000000..cb2ad12bce57 --- /dev/null +++ b/drivers/gpu/drm/drm_draw.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0 or MIT +/* + * Copyright (c) 2023 Red Hat. + * Author: Jocelyn Falempe <jfalempe@redhat.com> + */ + +#include <linux/bits.h> +#include <linux/iosys-map.h> +#include <linux/types.h> + +#include <drm/drm_fourcc.h> + +#include "drm_draw_internal.h" + +/* + * Conversions from xrgb8888 + */ + +static u16 convert_xrgb8888_to_rgb565(u32 pix) +{ + return ((pix & 0x00F80000) >> 8) | + ((pix & 0x0000FC00) >> 5) | + ((pix & 0x000000F8) >> 3); +} + +static u16 convert_xrgb8888_to_rgba5551(u32 pix) +{ + return ((pix & 0x00f80000) >> 8) | + ((pix & 0x0000f800) >> 5) | + ((pix & 0x000000f8) >> 2) | + BIT(0); /* set alpha bit */ +} + +static u16 convert_xrgb8888_to_xrgb1555(u32 pix) +{ + return ((pix & 0x00f80000) >> 9) | + ((pix & 0x0000f800) >> 6) | + ((pix & 0x000000f8) >> 3); +} + +static u16 convert_xrgb8888_to_argb1555(u32 pix) +{ + return BIT(15) | /* set alpha bit */ + ((pix & 0x00f80000) >> 9) | + ((pix & 0x0000f800) >> 6) | + ((pix & 0x000000f8) >> 3); +} + +static u32 convert_xrgb8888_to_argb8888(u32 pix) +{ + return pix | GENMASK(31, 24); /* fill alpha bits */ +} + +static u32 convert_xrgb8888_to_xbgr8888(u32 pix) +{ + return ((pix & 0x00ff0000) >> 16) << 0 | + ((pix & 0x0000ff00) >> 8) << 8 | + ((pix & 0x000000ff) >> 0) << 16 | + ((pix & 0xff000000) >> 24) << 24; +} + +static u32 convert_xrgb8888_to_abgr8888(u32 pix) +{ + return ((pix & 0x00ff0000) >> 16) << 0 | + ((pix & 0x0000ff00) >> 8) << 8 | + ((pix & 0x000000ff) >> 0) << 16 | + GENMASK(31, 24); /* fill alpha bits */ +} + +static u32 convert_xrgb8888_to_xrgb2101010(u32 pix) +{ + pix = ((pix & 0x000000FF) << 2) | + ((pix & 0x0000FF00) << 4) | + ((pix & 0x00FF0000) << 6); + return pix | ((pix >> 8) & 0x00300C03); +} + +static u32 convert_xrgb8888_to_argb2101010(u32 pix) +{ + pix = ((pix & 0x000000FF) << 2) | + ((pix & 0x0000FF00) << 4) | + ((pix & 0x00FF0000) << 6); + return GENMASK(31, 30) /* set alpha bits */ | pix | ((pix >> 8) & 0x00300C03); +} + +static u32 convert_xrgb8888_to_abgr2101010(u32 pix) +{ + pix = ((pix & 0x00FF0000) >> 14) | + ((pix & 0x0000FF00) << 4) | + ((pix & 0x000000FF) << 22); + return GENMASK(31, 30) /* set alpha bits */ | pix | ((pix >> 8) & 0x00300C03); +} + +/** + * drm_draw_color_from_xrgb8888 - convert one pixel from xrgb8888 to the desired format + * @color: input color, in xrgb8888 format + * @format: output format + * + * Returns: + * Color in the format specified, casted to u32. + * Or 0 if the format is not supported. + */ +u32 drm_draw_color_from_xrgb8888(u32 color, u32 format) +{ + switch (format) { + case DRM_FORMAT_RGB565: + return convert_xrgb8888_to_rgb565(color); + case DRM_FORMAT_RGBA5551: + return convert_xrgb8888_to_rgba5551(color); + case DRM_FORMAT_XRGB1555: + return convert_xrgb8888_to_xrgb1555(color); + case DRM_FORMAT_ARGB1555: + return convert_xrgb8888_to_argb1555(color); + case DRM_FORMAT_RGB888: + case DRM_FORMAT_XRGB8888: + return color; + case DRM_FORMAT_ARGB8888: + return convert_xrgb8888_to_argb8888(color); + case DRM_FORMAT_XBGR8888: + return convert_xrgb8888_to_xbgr8888(color); + case DRM_FORMAT_ABGR8888: + return convert_xrgb8888_to_abgr8888(color); + case DRM_FORMAT_XRGB2101010: + return convert_xrgb8888_to_xrgb2101010(color); + case DRM_FORMAT_ARGB2101010: + return convert_xrgb8888_to_argb2101010(color); + case DRM_FORMAT_ABGR2101010: + return convert_xrgb8888_to_abgr2101010(color); + default: + WARN_ONCE(1, "Can't convert to %p4cc\n", &format); + return 0; + } +} +EXPORT_SYMBOL(drm_draw_color_from_xrgb8888); + +/* + * Blit functions + */ +void drm_draw_blit16(struct iosys_map *dmap, unsigned int dpitch, + const u8 *sbuf8, unsigned int spitch, + unsigned int height, unsigned int width, + unsigned int scale, u16 fg16) +{ + unsigned int y, x; + + for (y = 0; y < height; y++) + for (x = 0; x < width; x++) + if (drm_draw_is_pixel_fg(sbuf8, spitch, x / scale, y / scale)) + iosys_map_wr(dmap, y * dpitch + x * sizeof(u16), u16, fg16); +} +EXPORT_SYMBOL(drm_draw_blit16); + +void drm_draw_blit24(struct iosys_map *dmap, unsigned int dpitch, + const u8 *sbuf8, unsigned int spitch, + unsigned int height, unsigned int width, + unsigned int scale, u32 fg32) +{ + unsigned int y, x; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + u32 off = y * dpitch + x * 3; + + if (drm_draw_is_pixel_fg(sbuf8, spitch, x / scale, y / scale)) { + /* write blue-green-red to output in little endianness */ + iosys_map_wr(dmap, off, u8, (fg32 & 0x000000FF) >> 0); + iosys_map_wr(dmap, off + 1, u8, (fg32 & 0x0000FF00) >> 8); + iosys_map_wr(dmap, off + 2, u8, (fg32 & 0x00FF0000) >> 16); + } + } + } +} +EXPORT_SYMBOL(drm_draw_blit24); + +void drm_draw_blit32(struct iosys_map *dmap, unsigned int dpitch, + const u8 *sbuf8, unsigned int spitch, + unsigned int height, unsigned int width, + unsigned int scale, u32 fg32) +{ + unsigned int y, x; + + for (y = 0; y < height; y++) + for (x = 0; x < width; x++) + if (drm_draw_is_pixel_fg(sbuf8, spitch, x / scale, y / scale)) + iosys_map_wr(dmap, y * dpitch + x * sizeof(u32), u32, fg32); +} +EXPORT_SYMBOL(drm_draw_blit32); + +/* + * Fill functions + */ +void drm_draw_fill16(struct iosys_map *dmap, unsigned int dpitch, + unsigned int height, unsigned int width, + u16 color) +{ + unsigned int y, x; + + for (y = 0; y < height; y++) + for (x = 0; x < width; x++) + iosys_map_wr(dmap, y * dpitch + x * sizeof(u16), u16, color); +} +EXPORT_SYMBOL(drm_draw_fill16); + +void drm_draw_fill24(struct iosys_map *dmap, unsigned int dpitch, + unsigned int height, unsigned int width, + u16 color) +{ + unsigned int y, x; + + for (y = 0; y < height; y++) { + for (x = 0; x < width; x++) { + unsigned int off = y * dpitch + x * 3; + + /* write blue-green-red to output in little endianness */ + iosys_map_wr(dmap, off, u8, (color & 0x000000FF) >> 0); + iosys_map_wr(dmap, off + 1, u8, (color & 0x0000FF00) >> 8); + iosys_map_wr(dmap, off + 2, u8, (color & 0x00FF0000) >> 16); + } + } +} +EXPORT_SYMBOL(drm_draw_fill24); + +void drm_draw_fill32(struct iosys_map *dmap, unsigned int dpitch, + unsigned int height, unsigned int width, + u32 color) +{ + unsigned int y, x; + + for (y = 0; y < height; y++) + for (x = 0; x < width; x++) + iosys_map_wr(dmap, y * dpitch + x * sizeof(u32), u32, color); +} +EXPORT_SYMBOL(drm_draw_fill32); diff --git a/drivers/gpu/drm/drm_draw_internal.h b/drivers/gpu/drm/drm_draw_internal.h new file mode 100644 index 000000000000..f121ee7339dc --- /dev/null +++ b/drivers/gpu/drm/drm_draw_internal.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 or MIT */ +/* + * Copyright (c) 2023 Red Hat. + * Author: Jocelyn Falempe <jfalempe@redhat.com> + */ + +#ifndef __DRM_DRAW_INTERNAL_H__ +#define __DRM_DRAW_INTERNAL_H__ + +#include <linux/font.h> +#include <linux/types.h> + +struct iosys_map; + +/* check if the pixel at coord x,y is 1 (foreground) or 0 (background) */ +static inline bool drm_draw_is_pixel_fg(const u8 *sbuf8, unsigned int spitch, int x, int y) +{ + return (sbuf8[(y * spitch) + x / 8] & (0x80 >> (x % 8))) != 0; +} + +static inline const u8 *drm_draw_get_char_bitmap(const struct font_desc *font, + char c, size_t font_pitch) +{ + return font->data + (c * font->height) * font_pitch; +} + +u32 drm_draw_color_from_xrgb8888(u32 color, u32 format); + +void drm_draw_blit16(struct iosys_map *dmap, unsigned int dpitch, + const u8 *sbuf8, unsigned int spitch, + unsigned int height, unsigned int width, + unsigned int scale, u16 fg16); + +void drm_draw_blit24(struct iosys_map *dmap, unsigned int dpitch, + const u8 *sbuf8, unsigned int spitch, + unsigned int height, unsigned int width, + unsigned int scale, u32 fg32); + +void drm_draw_blit32(struct iosys_map *dmap, unsigned int dpitch, + const u8 *sbuf8, unsigned int spitch, + unsigned int height, unsigned int width, + unsigned int scale, u32 fg32); + +void drm_draw_fill16(struct iosys_map *dmap, unsigned int dpitch, + unsigned int height, unsigned int width, + u16 color); + +void drm_draw_fill24(struct iosys_map *dmap, unsigned int dpitch, + unsigned int height, unsigned int width, + u16 color); + +void drm_draw_fill32(struct iosys_map *dmap, unsigned int dpitch, + unsigned int height, unsigned int width, + u32 color); + +#endif /* __DRM_DRAW_INTERNAL_H__ */ diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 855beafb76ff..13bc4c290b17 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5605,7 +5605,9 @@ EXPORT_SYMBOL(drm_edid_get_monitor_name); static void clear_eld(struct drm_connector *connector) { + mutex_lock(&connector->eld_mutex); memset(connector->eld, 0, sizeof(connector->eld)); + mutex_unlock(&connector->eld_mutex); connector->latency_present[0] = false; connector->latency_present[1] = false; @@ -5657,6 +5659,8 @@ static void drm_edid_to_eld(struct drm_connector *connector, if (!drm_edid) return; + mutex_lock(&connector->eld_mutex); + mnl = get_monitor_name(drm_edid, &eld[DRM_ELD_MONITOR_NAME_STRING]); drm_dbg_kms(connector->dev, "[CONNECTOR:%d:%s] ELD monitor %s\n", connector->base.id, connector->name, @@ -5717,6 +5721,8 @@ static void drm_edid_to_eld(struct drm_connector *connector, drm_dbg_kms(connector->dev, "[CONNECTOR:%d:%s] ELD size %d, SAD count %d\n", connector->base.id, connector->name, drm_eld_size(eld), total_sad_count); + + mutex_unlock(&connector->eld_mutex); } static int _drm_edid_to_sad(const struct drm_edid *drm_edid, diff --git a/drivers/gpu/drm/drm_mode_config.c b/drivers/gpu/drm/drm_mode_config.c index 37d2e0a4ef4b..8642a2fb25a9 100644 --- a/drivers/gpu/drm/drm_mode_config.c +++ b/drivers/gpu/drm/drm_mode_config.c @@ -150,6 +150,15 @@ int drm_mode_getresources(struct drm_device *dev, void *data, drm_connector_list_iter_begin(dev, &conn_iter); count = 0; connector_id = u64_to_user_ptr(card_res->connector_id_ptr); + /* + * FIXME: the connectors on the list may not be fully initialized yet, + * if the ioctl is called before the connectors are registered. (See + * drm_dev_register()->drm_modeset_register_all() for static and + * drm_connector_dynamic_register() for dynamic connectors.) + * The driver should only get registered after static connectors are + * fully initialized and dynamic connectors should be added to the + * connector list only after fully initializing them. + */ drm_for_each_connector_iter(connector, &conn_iter) { /* only expose writeback connectors if userspace understands them */ if (!file_priv->writeback_connectors && diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 6ba167a33461..513c04f74420 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -1282,8 +1282,7 @@ EXPORT_SYMBOL(drm_mode_set_name); * @mode: mode * * Returns: - * @modes's vrefresh rate in Hz, rounded to the nearest integer. Calculates the - * value first if it is not yet set. + * @modes's vrefresh rate in Hz, rounded to the nearest integer. */ int drm_mode_vrefresh(const struct drm_display_mode *mode) { diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c index 5c2abc9eca9c..5530919e0ba0 100644 --- a/drivers/gpu/drm/drm_of.c +++ b/drivers/gpu/drm/drm_of.c @@ -564,6 +564,8 @@ EXPORT_SYMBOL_GPL(drm_of_get_data_lanes_count_ep); * Gets parent DSI bus for a DSI device controlled through a bus other * than MIPI-DCS (SPI, I2C, etc.) using the Device Tree. * + * This function assumes that the device's port@0 is the DSI input. + * * Returns pointer to mipi_dsi_host if successful, -EINVAL if the * request is unsupported, -EPROBE_DEFER if the DSI host is found but * not available, or -ENODEV otherwise. @@ -576,7 +578,7 @@ struct mipi_dsi_host *drm_of_get_dsi_bus(struct device *dev) /* * Get first endpoint child from device. */ - endpoint = of_graph_get_next_endpoint(dev->of_node, NULL); + endpoint = of_graph_get_endpoint_by_regs(dev->of_node, 0, -1); if (!endpoint) return ERR_PTR(-ENODEV); diff --git a/drivers/gpu/drm/drm_panel.c b/drivers/gpu/drm/drm_panel.c index 19ab0a794add..2379e501c08b 100644 --- a/drivers/gpu/drm/drm_panel.c +++ b/drivers/gpu/drm/drm_panel.c @@ -24,6 +24,7 @@ #include <linux/backlight.h> #include <linux/err.h> #include <linux/module.h> +#include <linux/of.h> #include <drm/drm_crtc.h> #include <drm/drm_panel.h> diff --git a/drivers/gpu/drm/drm_panel_backlight_quirks.c b/drivers/gpu/drm/drm_panel_backlight_quirks.c new file mode 100644 index 000000000000..c477d98ade2b --- /dev/null +++ b/drivers/gpu/drm/drm_panel_backlight_quirks.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/array_size.h> +#include <linux/dmi.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <drm/drm_edid.h> +#include <drm/drm_utils.h> + +struct drm_panel_min_backlight_quirk { + struct { + enum dmi_field field; + const char * const value; + } dmi_match; + struct drm_edid_ident ident; + u8 min_brightness; +}; + +static const struct drm_panel_min_backlight_quirk drm_panel_min_backlight_quirks[] = { + /* 13 inch matte panel */ + { + .dmi_match.field = DMI_BOARD_VENDOR, + .dmi_match.value = "Framework", + .ident.panel_id = drm_edid_encode_panel_id('B', 'O', 'E', 0x0bca), + .ident.name = "NE135FBM-N41", + .min_brightness = 0, + }, + /* 13 inch glossy panel */ + { + .dmi_match.field = DMI_BOARD_VENDOR, + .dmi_match.value = "Framework", + .ident.panel_id = drm_edid_encode_panel_id('B', 'O', 'E', 0x095f), + .ident.name = "NE135FBM-N41", + .min_brightness = 0, + }, + /* 13 inch 2.8k panel */ + { + .dmi_match.field = DMI_BOARD_VENDOR, + .dmi_match.value = "Framework", + .ident.panel_id = drm_edid_encode_panel_id('B', 'O', 'E', 0x0cb4), + .ident.name = "NE135A1M-NY1", + .min_brightness = 0, + }, +}; + +static bool drm_panel_min_backlight_quirk_matches(const struct drm_panel_min_backlight_quirk *quirk, + const struct drm_edid *edid) +{ + if (!dmi_match(quirk->dmi_match.field, quirk->dmi_match.value)) + return false; + + if (!drm_edid_match(edid, &quirk->ident)) + return false; + + return true; +} + +/** + * drm_get_panel_min_brightness_quirk - Get minimum supported brightness level for a panel. + * @edid: EDID of the panel to check + * + * This function checks for platform specific (e.g. DMI based) quirks + * providing info on the minimum backlight brightness for systems where this + * cannot be probed correctly from the hard-/firm-ware. + * + * Returns: + * A negative error value or + * an override value in the range [0, 255] representing 0-100% to be scaled to + * the drivers target range. + */ +int drm_get_panel_min_brightness_quirk(const struct drm_edid *edid) +{ + const struct drm_panel_min_backlight_quirk *quirk; + size_t i; + + if (!IS_ENABLED(CONFIG_DMI)) + return -ENODATA; + + if (!edid) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(drm_panel_min_backlight_quirks); i++) { + quirk = &drm_panel_min_backlight_quirks[i]; + + if (drm_panel_min_backlight_quirk_matches(quirk, edid)) + return quirk->min_brightness; + } + + return -ENODATA; +} +EXPORT_SYMBOL(drm_get_panel_min_brightness_quirk); + +MODULE_DESCRIPTION("Quirks for panel backlight overrides"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 0a9ecc1380d2..f128d345b16d 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -31,6 +31,7 @@ #include <drm/drm_rect.h> #include "drm_crtc_internal.h" +#include "drm_draw_internal.h" MODULE_AUTHOR("Jocelyn Falempe"); MODULE_DESCRIPTION("DRM panic handler"); @@ -139,181 +140,8 @@ device_initcall(drm_panic_setup_logo); #endif /* - * Color conversion + * Blit & Fill functions */ - -static u16 convert_xrgb8888_to_rgb565(u32 pix) -{ - return ((pix & 0x00F80000) >> 8) | - ((pix & 0x0000FC00) >> 5) | - ((pix & 0x000000F8) >> 3); -} - -static u16 convert_xrgb8888_to_rgba5551(u32 pix) -{ - return ((pix & 0x00f80000) >> 8) | - ((pix & 0x0000f800) >> 5) | - ((pix & 0x000000f8) >> 2) | - BIT(0); /* set alpha bit */ -} - -static u16 convert_xrgb8888_to_xrgb1555(u32 pix) -{ - return ((pix & 0x00f80000) >> 9) | - ((pix & 0x0000f800) >> 6) | - ((pix & 0x000000f8) >> 3); -} - -static u16 convert_xrgb8888_to_argb1555(u32 pix) -{ - return BIT(15) | /* set alpha bit */ - ((pix & 0x00f80000) >> 9) | - ((pix & 0x0000f800) >> 6) | - ((pix & 0x000000f8) >> 3); -} - -static u32 convert_xrgb8888_to_argb8888(u32 pix) -{ - return pix | GENMASK(31, 24); /* fill alpha bits */ -} - -static u32 convert_xrgb8888_to_xbgr8888(u32 pix) -{ - return ((pix & 0x00ff0000) >> 16) << 0 | - ((pix & 0x0000ff00) >> 8) << 8 | - ((pix & 0x000000ff) >> 0) << 16 | - ((pix & 0xff000000) >> 24) << 24; -} - -static u32 convert_xrgb8888_to_abgr8888(u32 pix) -{ - return ((pix & 0x00ff0000) >> 16) << 0 | - ((pix & 0x0000ff00) >> 8) << 8 | - ((pix & 0x000000ff) >> 0) << 16 | - GENMASK(31, 24); /* fill alpha bits */ -} - -static u32 convert_xrgb8888_to_xrgb2101010(u32 pix) -{ - pix = ((pix & 0x000000FF) << 2) | - ((pix & 0x0000FF00) << 4) | - ((pix & 0x00FF0000) << 6); - return pix | ((pix >> 8) & 0x00300C03); -} - -static u32 convert_xrgb8888_to_argb2101010(u32 pix) -{ - pix = ((pix & 0x000000FF) << 2) | - ((pix & 0x0000FF00) << 4) | - ((pix & 0x00FF0000) << 6); - return GENMASK(31, 30) /* set alpha bits */ | pix | ((pix >> 8) & 0x00300C03); -} - -static u32 convert_xrgb8888_to_abgr2101010(u32 pix) -{ - pix = ((pix & 0x00FF0000) >> 14) | - ((pix & 0x0000FF00) << 4) | - ((pix & 0x000000FF) << 22); - return GENMASK(31, 30) /* set alpha bits */ | pix | ((pix >> 8) & 0x00300C03); -} - -/* - * convert_from_xrgb8888 - convert one pixel from xrgb8888 to the desired format - * @color: input color, in xrgb8888 format - * @format: output format - * - * Returns: - * Color in the format specified, casted to u32. - * Or 0 if the format is not supported. - */ -static u32 convert_from_xrgb8888(u32 color, u32 format) -{ - switch (format) { - case DRM_FORMAT_RGB565: - return convert_xrgb8888_to_rgb565(color); - case DRM_FORMAT_RGBA5551: - return convert_xrgb8888_to_rgba5551(color); - case DRM_FORMAT_XRGB1555: - return convert_xrgb8888_to_xrgb1555(color); - case DRM_FORMAT_ARGB1555: - return convert_xrgb8888_to_argb1555(color); - case DRM_FORMAT_RGB888: - case DRM_FORMAT_XRGB8888: - return color; - case DRM_FORMAT_ARGB8888: - return convert_xrgb8888_to_argb8888(color); - case DRM_FORMAT_XBGR8888: - return convert_xrgb8888_to_xbgr8888(color); - case DRM_FORMAT_ABGR8888: - return convert_xrgb8888_to_abgr8888(color); - case DRM_FORMAT_XRGB2101010: - return convert_xrgb8888_to_xrgb2101010(color); - case DRM_FORMAT_ARGB2101010: - return convert_xrgb8888_to_argb2101010(color); - case DRM_FORMAT_ABGR2101010: - return convert_xrgb8888_to_abgr2101010(color); - default: - WARN_ONCE(1, "Can't convert to %p4cc\n", &format); - return 0; - } -} - -/* - * Blit & Fill - */ -/* check if the pixel at coord x,y is 1 (foreground) or 0 (background) */ -static bool drm_panic_is_pixel_fg(const u8 *sbuf8, unsigned int spitch, int x, int y) -{ - return (sbuf8[(y * spitch) + x / 8] & (0x80 >> (x % 8))) != 0; -} - -static void drm_panic_blit16(struct iosys_map *dmap, unsigned int dpitch, - const u8 *sbuf8, unsigned int spitch, - unsigned int height, unsigned int width, - unsigned int scale, u16 fg16) -{ - unsigned int y, x; - - for (y = 0; y < height; y++) - for (x = 0; x < width; x++) - if (drm_panic_is_pixel_fg(sbuf8, spitch, x / scale, y / scale)) - iosys_map_wr(dmap, y * dpitch + x * sizeof(u16), u16, fg16); -} - -static void drm_panic_blit24(struct iosys_map *dmap, unsigned int dpitch, - const u8 *sbuf8, unsigned int spitch, - unsigned int height, unsigned int width, - unsigned int scale, u32 fg32) -{ - unsigned int y, x; - - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - u32 off = y * dpitch + x * 3; - - if (drm_panic_is_pixel_fg(sbuf8, spitch, x / scale, y / scale)) { - /* write blue-green-red to output in little endianness */ - iosys_map_wr(dmap, off, u8, (fg32 & 0x000000FF) >> 0); - iosys_map_wr(dmap, off + 1, u8, (fg32 & 0x0000FF00) >> 8); - iosys_map_wr(dmap, off + 2, u8, (fg32 & 0x00FF0000) >> 16); - } - } - } -} - -static void drm_panic_blit32(struct iosys_map *dmap, unsigned int dpitch, - const u8 *sbuf8, unsigned int spitch, - unsigned int height, unsigned int width, - unsigned int scale, u32 fg32) -{ - unsigned int y, x; - - for (y = 0; y < height; y++) - for (x = 0; x < width; x++) - if (drm_panic_is_pixel_fg(sbuf8, spitch, x / scale, y / scale)) - iosys_map_wr(dmap, y * dpitch + x * sizeof(u32), u32, fg32); -} - static void drm_panic_blit_pixel(struct drm_scanout_buffer *sb, struct drm_rect *clip, const u8 *sbuf8, unsigned int spitch, unsigned int scale, u32 fg_color) @@ -322,7 +150,7 @@ static void drm_panic_blit_pixel(struct drm_scanout_buffer *sb, struct drm_rect for (y = 0; y < drm_rect_height(clip); y++) for (x = 0; x < drm_rect_width(clip); x++) - if (drm_panic_is_pixel_fg(sbuf8, spitch, x / scale, y / scale)) + if (drm_draw_is_pixel_fg(sbuf8, spitch, x / scale, y / scale)) sb->set_pixel(sb, clip->x1 + x, clip->y1 + y, fg_color); } @@ -354,62 +182,22 @@ static void drm_panic_blit(struct drm_scanout_buffer *sb, struct drm_rect *clip, switch (sb->format->cpp[0]) { case 2: - drm_panic_blit16(&map, sb->pitch[0], sbuf8, spitch, - drm_rect_height(clip), drm_rect_width(clip), scale, fg_color); + drm_draw_blit16(&map, sb->pitch[0], sbuf8, spitch, + drm_rect_height(clip), drm_rect_width(clip), scale, fg_color); break; case 3: - drm_panic_blit24(&map, sb->pitch[0], sbuf8, spitch, - drm_rect_height(clip), drm_rect_width(clip), scale, fg_color); + drm_draw_blit24(&map, sb->pitch[0], sbuf8, spitch, + drm_rect_height(clip), drm_rect_width(clip), scale, fg_color); break; case 4: - drm_panic_blit32(&map, sb->pitch[0], sbuf8, spitch, - drm_rect_height(clip), drm_rect_width(clip), scale, fg_color); + drm_draw_blit32(&map, sb->pitch[0], sbuf8, spitch, + drm_rect_height(clip), drm_rect_width(clip), scale, fg_color); break; default: WARN_ONCE(1, "Can't blit with pixel width %d\n", sb->format->cpp[0]); } } -static void drm_panic_fill16(struct iosys_map *dmap, unsigned int dpitch, - unsigned int height, unsigned int width, - u16 color) -{ - unsigned int y, x; - - for (y = 0; y < height; y++) - for (x = 0; x < width; x++) - iosys_map_wr(dmap, y * dpitch + x * sizeof(u16), u16, color); -} - -static void drm_panic_fill24(struct iosys_map *dmap, unsigned int dpitch, - unsigned int height, unsigned int width, - u32 color) -{ - unsigned int y, x; - - for (y = 0; y < height; y++) { - for (x = 0; x < width; x++) { - unsigned int off = y * dpitch + x * 3; - - /* write blue-green-red to output in little endianness */ - iosys_map_wr(dmap, off, u8, (color & 0x000000FF) >> 0); - iosys_map_wr(dmap, off + 1, u8, (color & 0x0000FF00) >> 8); - iosys_map_wr(dmap, off + 2, u8, (color & 0x00FF0000) >> 16); - } - } -} - -static void drm_panic_fill32(struct iosys_map *dmap, unsigned int dpitch, - unsigned int height, unsigned int width, - u32 color) -{ - unsigned int y, x; - - for (y = 0; y < height; y++) - for (x = 0; x < width; x++) - iosys_map_wr(dmap, y * dpitch + x * sizeof(u32), u32, color); -} - static void drm_panic_fill_pixel(struct drm_scanout_buffer *sb, struct drm_rect *clip, u32 color) @@ -442,27 +230,22 @@ static void drm_panic_fill(struct drm_scanout_buffer *sb, struct drm_rect *clip, switch (sb->format->cpp[0]) { case 2: - drm_panic_fill16(&map, sb->pitch[0], drm_rect_height(clip), - drm_rect_width(clip), color); + drm_draw_fill16(&map, sb->pitch[0], drm_rect_height(clip), + drm_rect_width(clip), color); break; case 3: - drm_panic_fill24(&map, sb->pitch[0], drm_rect_height(clip), - drm_rect_width(clip), color); + drm_draw_fill24(&map, sb->pitch[0], drm_rect_height(clip), + drm_rect_width(clip), color); break; case 4: - drm_panic_fill32(&map, sb->pitch[0], drm_rect_height(clip), - drm_rect_width(clip), color); + drm_draw_fill32(&map, sb->pitch[0], drm_rect_height(clip), + drm_rect_width(clip), color); break; default: WARN_ONCE(1, "Can't fill with pixel width %d\n", sb->format->cpp[0]); } } -static const u8 *get_char_bitmap(const struct font_desc *font, char c, size_t font_pitch) -{ - return font->data + (c * font->height) * font_pitch; -} - static unsigned int get_max_line_len(const struct drm_panic_line *lines, int len) { int i; @@ -501,7 +284,7 @@ static void draw_txt_rectangle(struct drm_scanout_buffer *sb, rec.x1 += (drm_rect_width(clip) - (line_len * font->width)) / 2; for (j = 0; j < line_len; j++) { - src = get_char_bitmap(font, msg[i].txt[j], font_pitch); + src = drm_draw_get_char_bitmap(font, msg[i].txt[j], font_pitch); rec.x2 = rec.x1 + font->width; drm_panic_blit(sb, &rec, src, font_pitch, 1, color); rec.x1 += font->width; @@ -533,8 +316,10 @@ static void drm_panic_logo_draw(struct drm_scanout_buffer *sb, struct drm_rect * static void draw_panic_static_user(struct drm_scanout_buffer *sb) { - u32 fg_color = convert_from_xrgb8888(CONFIG_DRM_PANIC_FOREGROUND_COLOR, sb->format->format); - u32 bg_color = convert_from_xrgb8888(CONFIG_DRM_PANIC_BACKGROUND_COLOR, sb->format->format); + u32 fg_color = drm_draw_color_from_xrgb8888(CONFIG_DRM_PANIC_FOREGROUND_COLOR, + sb->format->format); + u32 bg_color = drm_draw_color_from_xrgb8888(CONFIG_DRM_PANIC_BACKGROUND_COLOR, + sb->format->format); const struct font_desc *font = get_default_font(sb->width, sb->height, NULL, NULL); struct drm_rect r_screen, r_logo, r_msg; unsigned int msg_width, msg_height; @@ -600,8 +385,10 @@ static int draw_line_with_wrap(struct drm_scanout_buffer *sb, const struct font_ */ static void draw_panic_static_kmsg(struct drm_scanout_buffer *sb) { - u32 fg_color = convert_from_xrgb8888(CONFIG_DRM_PANIC_FOREGROUND_COLOR, sb->format->format); - u32 bg_color = convert_from_xrgb8888(CONFIG_DRM_PANIC_BACKGROUND_COLOR, sb->format->format); + u32 fg_color = drm_draw_color_from_xrgb8888(CONFIG_DRM_PANIC_FOREGROUND_COLOR, + sb->format->format); + u32 bg_color = drm_draw_color_from_xrgb8888(CONFIG_DRM_PANIC_BACKGROUND_COLOR, + sb->format->format); const struct font_desc *font = get_default_font(sb->width, sb->height, NULL, NULL); struct drm_rect r_screen = DRM_RECT_INIT(0, 0, sb->width, sb->height); struct kmsg_dump_iter iter; @@ -791,8 +578,10 @@ static int drm_panic_get_qr_code(u8 **qr_image) */ static int _draw_panic_static_qr_code(struct drm_scanout_buffer *sb) { - u32 fg_color = convert_from_xrgb8888(CONFIG_DRM_PANIC_FOREGROUND_COLOR, sb->format->format); - u32 bg_color = convert_from_xrgb8888(CONFIG_DRM_PANIC_BACKGROUND_COLOR, sb->format->format); + u32 fg_color = drm_draw_color_from_xrgb8888(CONFIG_DRM_PANIC_FOREGROUND_COLOR, + sb->format->format); + u32 bg_color = drm_draw_color_from_xrgb8888(CONFIG_DRM_PANIC_BACKGROUND_COLOR, + sb->format->format); const struct font_desc *font = get_default_font(sb->width, sb->height, NULL, NULL); struct drm_rect r_screen, r_logo, r_msg, r_qr, r_qr_canvas; unsigned int max_qr_size, scale; @@ -878,7 +667,7 @@ static bool drm_panic_is_format_supported(const struct drm_format_info *format) { if (format->num_planes != 1) return false; - return convert_from_xrgb8888(0xffffff, format->format) != 0; + return drm_draw_color_from_xrgb8888(0xffffff, format->format) != 0; } static void draw_panic_dispatch(struct drm_scanout_buffer *sb) diff --git a/drivers/gpu/drm/drm_panic_qr.rs b/drivers/gpu/drm/drm_panic_qr.rs index 09500cddc009..ef2d490965ba 100644 --- a/drivers/gpu/drm/drm_panic_qr.rs +++ b/drivers/gpu/drm/drm_panic_qr.rs @@ -929,7 +929,6 @@ impl QrImage<'_> { /// * `tmp` must be valid for reading and writing for `tmp_size` bytes. /// /// They must remain valid for the duration of the function call. - #[no_mangle] pub unsafe extern "C" fn drm_panic_qr_generate( url: *const i8, diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c index a46f9e4ac09a..00707001d112 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c @@ -503,7 +503,6 @@ static const struct drm_driver etnaviv_drm_driver = { .fops = &fops, .name = "etnaviv", .desc = "etnaviv DRM", - .date = "20151214", .major = 1, .minor = 4, }; diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index 1c44f85c5f54..f313ae7bc3a3 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -13,9 +13,9 @@ #include <linux/pm_runtime.h> #include <linux/uaccess.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_fourcc.h> @@ -35,7 +35,6 @@ #define DRIVER_NAME "exynos" #define DRIVER_DESC "Samsung SoC DRM" -#define DRIVER_DATE "20180330" /* * Interface history: @@ -118,7 +117,6 @@ static const struct drm_driver exynos_drm_driver = { .fops = &exynos_drm_driver_fops, .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, }; diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index 466a9e514aa1..7a57d4a23e41 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -1648,7 +1648,9 @@ static int hdmi_audio_get_eld(struct device *dev, void *data, uint8_t *buf, struct hdmi_context *hdata = dev_get_drvdata(dev); struct drm_connector *connector = &hdata->connector; + mutex_lock(&connector->eld_mutex); memcpy(buf, connector->eld, min(sizeof(connector->eld), len)); + mutex_unlock(&connector->eld_mutex); return 0; } diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c index be1ab673e49e..03b076db9381 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c @@ -18,8 +18,8 @@ #include <linux/pm_runtime.h> #include <linux/regmap.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_dma_helper.h> @@ -174,7 +174,6 @@ static const struct drm_driver fsl_dcu_drm_driver = { .fops = &fsl_dcu_drm_fops, .name = "fsl-dcu-drm", .desc = "Freescale DCU DRM", - .date = "20160425", .major = 1, .minor = 1, }; diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c index 2c2b92324a2e..c418e8496bdf 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c @@ -6,6 +6,7 @@ */ #include <linux/backlight.h> +#include <linux/of.h> #include <linux/of_graph.h> #include <drm/drm_atomic_helper.h> diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c index c419ebbc49ec..85d3557c2eb9 100644 --- a/drivers/gpu/drm/gma500/psb_drv.c +++ b/drivers/gpu/drm/gma500/psb_drv.c @@ -19,8 +19,8 @@ #include <acpi/video.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_ioctl.h> @@ -513,7 +513,6 @@ static const struct drm_driver driver = { .fops = &psb_gem_fops, .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h index de62cbfcdc72..7f77cb2b2751 100644 --- a/drivers/gpu/drm/gma500/psb_drv.h +++ b/drivers/gpu/drm/gma500/psb_drv.h @@ -26,7 +26,6 @@ #define DRIVER_NAME "gma500" #define DRIVER_DESC "DRM driver for the Intel GMA500, GMA600, GMA3600, GMA3650" -#define DRIVER_DATE "20140314" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 diff --git a/drivers/gpu/drm/gud/gud_drv.c b/drivers/gpu/drm/gud/gud_drv.c index 09ccdc1dc1a2..cb405771d6e2 100644 --- a/drivers/gpu/drm/gud/gud_drv.c +++ b/drivers/gpu/drm/gud/gud_drv.c @@ -13,9 +13,9 @@ #include <linux/vmalloc.h> #include <linux/workqueue.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_blend.h> -#include <drm/drm_client_setup.h> #include <drm/drm_damage_helper.h> #include <drm/drm_debugfs.h> #include <drm/drm_drv.h> @@ -381,7 +381,6 @@ static const struct drm_driver gud_drm_driver = { .name = "gud", .desc = "Generic USB Display", - .date = "20200422", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/hisilicon/hibmc/Kconfig b/drivers/gpu/drm/hisilicon/hibmc/Kconfig index 80253d39664a..93b8d32e3be1 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/Kconfig +++ b/drivers/gpu/drm/hisilicon/hibmc/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_HISI_HIBMC tristate "DRM Support for Hisilicon Hibmc" - depends on DRM && PCI && (ARM64 || COMPILE_TEST) + depends on DRM && PCI depends on MMU select DRM_CLIENT_SELECTION select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 8c488c98ac97..7f814c32ed34 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -15,8 +15,8 @@ #include <linux/module.h> #include <linux/pci.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_ttm.h> #include <drm/drm_gem_framebuffer_helper.h> @@ -57,7 +57,6 @@ static const struct drm_driver hibmc_driver = { .driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC, .fops = &hibmc_fops, .name = "hibmc", - .date = "20160828", .desc = "hibmc drm driver", .major = 1, .minor = 0, diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c index 5616c3917c03..2eb49177ac42 100644 --- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c @@ -929,7 +929,6 @@ static const struct drm_driver ade_driver = { DRM_FBDEV_DMA_DRIVER_OPS, .name = "kirin", .desc = "Hisilicon Kirin620 SoC DRM Driver", - .date = "20150718", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c index b3ab944652a6..1e1c87be1204 100644 --- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c @@ -17,8 +17,8 @@ #include <linux/of_graph.h> #include <linux/platform_device.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_gem_dma_helper.h> #include <drm/drm_gem_framebuffer_helper.h> diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c index e0953777a206..f59abfa7622a 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c @@ -9,8 +9,8 @@ #include <linux/module.h> #include <linux/pci.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_shmem.h> #include <drm/drm_gem_shmem_helper.h> @@ -20,7 +20,6 @@ #define DRIVER_NAME "hyperv_drm" #define DRIVER_DESC "DRM driver for Hyper-V synthetic video device" -#define DRIVER_DATE "2020" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 @@ -31,7 +30,6 @@ static struct drm_driver hyperv_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, diff --git a/drivers/gpu/drm/i2c/ch7006_drv.c b/drivers/gpu/drm/i2c/ch7006_drv.c index 131512a5f3bd..fcb0fcd6c897 100644 --- a/drivers/gpu/drm/i2c/ch7006_drv.c +++ b/drivers/gpu/drm/i2c/ch7006_drv.c @@ -486,7 +486,7 @@ static int ch7006_encoder_init(struct i2c_client *client, } static const struct i2c_device_id ch7006_ids[] = { - { "ch7006", 0 }, + { "ch7006" }, { } }; MODULE_DEVICE_TABLE(i2c, ch7006_ids); diff --git a/drivers/gpu/drm/i2c/sil164_drv.c b/drivers/gpu/drm/i2c/sil164_drv.c index ff23422727fc..c17afa025d9d 100644 --- a/drivers/gpu/drm/i2c/sil164_drv.c +++ b/drivers/gpu/drm/i2c/sil164_drv.c @@ -413,7 +413,7 @@ sil164_encoder_init(struct i2c_client *client, } static const struct i2c_device_id sil164_ids[] = { - { "sil164", 0 }, + { "sil164" }, { } }; MODULE_DEVICE_TABLE(i2c, sil164_ids); diff --git a/drivers/gpu/drm/i2c/tda9950.c b/drivers/gpu/drm/i2c/tda9950.c index 82d618c40dce..cbff851e0c85 100644 --- a/drivers/gpu/drm/i2c/tda9950.c +++ b/drivers/gpu/drm/i2c/tda9950.c @@ -486,8 +486,8 @@ static void tda9950_remove(struct i2c_client *client) } static struct i2c_device_id tda9950_ids[] = { - { "tda9950", 0 }, - { }, + { "tda9950" }, + { } }; MODULE_DEVICE_TABLE(i2c, tda9950_ids); diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c index 2160f05bbd16..57ce77c2be24 100644 --- a/drivers/gpu/drm/i2c/tda998x_drv.c +++ b/drivers/gpu/drm/i2c/tda998x_drv.c @@ -2094,7 +2094,7 @@ MODULE_DEVICE_TABLE(of, tda998x_dt_ids); #endif static const struct i2c_device_id tda998x_ids[] = { - { "tda998x", 0 }, + { "tda998x" }, { } }; MODULE_DEVICE_TABLE(i2c, tda998x_ids); diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index c6b251f178c2..ce8a4319a63c 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -698,10 +698,12 @@ bool intel_audio_compute_config(struct intel_encoder *encoder, const struct drm_display_mode *adjusted_mode = &crtc_state->hw.adjusted_mode; + mutex_lock(&connector->eld_mutex); if (!connector->eld[0]) { drm_dbg_kms(&i915->drm, "Bogus ELD on [CONNECTOR:%d:%s]\n", connector->base.id, connector->name); + mutex_unlock(&connector->eld_mutex); return false; } @@ -709,6 +711,7 @@ bool intel_audio_compute_config(struct intel_encoder *encoder, memcpy(crtc_state->eld, connector->eld, sizeof(crtc_state->eld)); crtc_state->eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2; + mutex_unlock(&connector->eld_mutex); return true; } diff --git a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h index f0e5c196eae4..c685479c9756 100644 --- a/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h +++ b/drivers/gpu/drm/i915/display/intel_cx0_phy_regs.h @@ -200,6 +200,13 @@ #define XELPDP_SSC_ENABLE_PLLA REG_BIT(1) #define XELPDP_SSC_ENABLE_PLLB REG_BIT(0) +#define TCSS_DISP_MAILBOX_IN_CMD _MMIO(0x161300) +#define TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY REG_BIT(31) +#define TCSS_DISP_MAILBOX_IN_CMD_CMD_MASK REG_GENMASK(7, 0) +#define TCSS_DISP_MAILBOX_IN_CMD_DATA(val) REG_FIELD_PREP(TCSS_DISP_MAILBOX_IN_CMD_CMD_MASK, val) + +#define TCSS_DISP_MAILBOX_IN_DATA _MMIO(0x161304) + /* C10 Vendor Registers */ #define PHY_C10_VDR_PLL(idx) (0xC00 + (idx)) #define C10_PLL0_FRACEN REG_BIT8(4) diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index 123c4ece6268..debe4d0eee11 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1731,6 +1731,8 @@ mst_topology_add_connector(struct drm_dp_mst_topology_mgr *mgr, if (!intel_connector) return NULL; + connector = &intel_connector->base; + intel_connector->get_hw_state = mst_connector_get_hw_state; intel_connector->sync_state = intel_dp_connector_sync_state; intel_connector->mst_port = intel_dp; @@ -1739,30 +1741,19 @@ mst_topology_add_connector(struct drm_dp_mst_topology_mgr *mgr, intel_dp_init_modeset_retry_work(intel_connector); - /* - * TODO: The following drm_connector specific initialization belongs - * to DRM core, however it happens atm too late in - * drm_connector_init(). That function will also expose the connector - * to in-kernel users, so it can't be called until the connector is - * sufficiently initialized; init the device pointer used by the - * following DSC setup, until a fix moving this to DRM core. - */ - intel_connector->base.dev = mgr->dev; - - intel_connector->dp.dsc_decompression_aux = drm_dp_mst_dsc_aux_for_port(port); - intel_dp_mst_read_decompression_port_dsc_caps(intel_dp, intel_connector); - intel_connector->dp.dsc_hblank_expansion_quirk = - detect_dsc_hblank_expansion_quirk(intel_connector); - - connector = &intel_connector->base; - ret = drm_connector_init(display->drm, connector, &mst_connector_funcs, - DRM_MODE_CONNECTOR_DisplayPort); + ret = drm_connector_dynamic_init(display->drm, connector, &mst_connector_funcs, + DRM_MODE_CONNECTOR_DisplayPort, NULL); if (ret) { drm_dp_mst_put_port_malloc(port); intel_connector_free(intel_connector); return NULL; } + intel_connector->dp.dsc_decompression_aux = drm_dp_mst_dsc_aux_for_port(port); + intel_dp_mst_read_decompression_port_dsc_caps(intel_dp, intel_connector); + intel_connector->dp.dsc_hblank_expansion_quirk = + detect_dsc_hblank_expansion_quirk(intel_connector); + drm_connector_helper_add(connector, &mst_connector_helper_funcs); for_each_pipe(display, pipe) { diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 0e4d78b146f6..13811244c82b 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -1013,21 +1013,52 @@ xelpdp_tc_phy_wait_for_tcss_power(struct intel_tc_port *tc, bool enabled) return true; } +/* + * Gfx driver WA 14020908590 for PTL tcss_rxdetect_clkswb_req/ack + * handshake violation when pwwreq= 0->1 during TC7/10 entry + */ +static void xelpdp_tc_power_request_wa(struct intel_display *display, bool enable) +{ + /* check if mailbox is running busy */ + if (intel_de_wait_for_clear(display, TCSS_DISP_MAILBOX_IN_CMD, + TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY, 10)) { + drm_dbg_kms(display->drm, + "Timeout waiting for TCSS mailbox run/busy bit to clear\n"); + return; + } + + intel_de_write(display, TCSS_DISP_MAILBOX_IN_DATA, enable ? 1 : 0); + intel_de_write(display, TCSS_DISP_MAILBOX_IN_CMD, + TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY | + TCSS_DISP_MAILBOX_IN_CMD_DATA(0x1)); + + /* wait to clear mailbox running busy bit before continuing */ + if (intel_de_wait_for_clear(display, TCSS_DISP_MAILBOX_IN_CMD, + TCSS_DISP_MAILBOX_IN_CMD_RUN_BUSY, 10)) { + drm_dbg_kms(display->drm, + "Timeout after writing data to mailbox. Mailbox run/busy bit did not clear\n"); + return; + } +} + static void __xelpdp_tc_phy_enable_tcss_power(struct intel_tc_port *tc, bool enable) { - struct drm_i915_private *i915 = tc_to_i915(tc); + struct intel_display *display = to_intel_display(tc->dig_port); enum port port = tc->dig_port->base.port; - i915_reg_t reg = XELPDP_PORT_BUF_CTL1(i915, port); + i915_reg_t reg = XELPDP_PORT_BUF_CTL1(display, port); u32 val; assert_tc_cold_blocked(tc); - val = intel_de_read(i915, reg); + if (DISPLAY_VER(display) == 30) + xelpdp_tc_power_request_wa(display, enable); + + val = intel_de_read(display, reg); if (enable) val |= XELPDP_TCSS_POWER_REQUEST; else val &= ~XELPDP_TCSS_POWER_REQUEST; - intel_de_write(i915, reg, val); + intel_de_write(display, reg, val); } static bool xelpdp_tc_phy_enable_tcss_power(struct intel_tc_port *tc, bool enable) diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 40269e4c1e31..325da0414d94 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -126,9 +126,6 @@ execlists_active(const struct intel_engine_execlists *execlists) return active; } -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists); - static inline u32 intel_read_status_page(const struct intel_engine_cs *engine, int reg) { diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index ba55c059063d..fe1f85e5dda3 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -343,6 +343,11 @@ struct intel_engine_guc_stats { * @start_gt_clk: GT clock time of last idle to active transition. */ u64 start_gt_clk; + + /** + * @total: The last value of total returned + */ + u64 total; }; union intel_engine_tlb_inv_reg { diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c index 72090f52fb85..4a80ffa1b962 100644 --- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -405,15 +405,6 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine) return active; } -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) -{ - struct intel_engine_cs *engine = - container_of(execlists, typeof(*engine), execlists); - - return __unwind_incomplete_requests(engine); -} - static void execlists_context_status_change(struct i915_request *rq, unsigned long status) { diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index dcef8d498919..c207a4fb03bf 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -1125,6 +1125,7 @@ static u64 measure_power(struct intel_rps *rps, int *freq) static u64 measure_power_at(struct intel_rps *rps, int *freq) { *freq = rps_set_check(rps, *freq); + msleep(100); return measure_power(rps, freq); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index 9ede6f240d79..12f1ba7ca9c1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1243,6 +1243,21 @@ static void __get_engine_usage_record(struct intel_engine_cs *engine, } while (++i < 6); } +static void __set_engine_usage_record(struct intel_engine_cs *engine, + u32 last_in, u32 id, u32 total) +{ + struct iosys_map rec_map = intel_guc_engine_usage_record_map(engine); + +#define record_write(map_, field_, val_) \ + iosys_map_wr_field(map_, 0, struct guc_engine_usage_record, field_, val_) + + record_write(&rec_map, last_switch_in_stamp, last_in); + record_write(&rec_map, current_context_index, id); + record_write(&rec_map, total_runtime, total); + +#undef record_write +} + static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) { struct intel_engine_guc_stats *stats = &engine->stats.guc; @@ -1363,9 +1378,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) total += intel_gt_clock_interval_to_ns(gt, clk); } + if (total > stats->total) + stats->total = total; + spin_unlock_irqrestore(&guc->timestamp.lock, flags); - return ns_to_ktime(total); + return ns_to_ktime(stats->total); } static void guc_enable_busyness_worker(struct intel_guc *guc) @@ -1431,8 +1449,21 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) guc_update_pm_timestamp(guc, &unused); for_each_engine(engine, gt, id) { + struct intel_engine_guc_stats *stats = &engine->stats.guc; + guc_update_engine_gt_clks(engine); - engine->stats.guc.prev_total = 0; + + /* + * If resetting a running context, accumulate the active + * time as well since there will be no context switch. + */ + if (stats->running) { + u64 clk = guc->timestamp.gt_stamp - stats->start_gt_clk; + + stats->total_gt_clks += clk; + } + stats->prev_total = 0; + stats->running = 0; } spin_unlock_irqrestore(&guc->timestamp.lock, flags); @@ -1543,6 +1574,9 @@ err_trylock: static int guc_action_enable_usage_stats(struct intel_guc *guc) { + struct intel_gt *gt = guc_to_gt(guc); + struct intel_engine_cs *engine; + enum intel_engine_id id; u32 offset = intel_guc_engine_usage_offset(guc); u32 action[] = { INTEL_GUC_ACTION_SET_ENG_UTIL_BUFF, @@ -1550,6 +1584,9 @@ static int guc_action_enable_usage_stats(struct intel_guc *guc) 0, }; + for_each_engine(engine, gt, id) + __set_engine_usage_record(engine, 0, 0xffffffff, 0); + return intel_guc_send(guc, action, ARRAY_SIZE(action)); } @@ -1688,6 +1725,10 @@ void intel_guc_submission_reset_prepare(struct intel_guc *guc) spin_lock_irq(guc_to_gt(guc)->irq_lock); spin_unlock_irq(guc_to_gt(guc)->irq_lock); + /* Flush tasklet */ + tasklet_disable(&guc->ct.receive_tasklet); + tasklet_enable(&guc->ct.receive_tasklet); + guc_flush_submissions(guc); guc_flush_destroyed_contexts(guc); flush_work(&guc->ct.requests.worker); @@ -2005,6 +2046,8 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc) void intel_guc_submission_reset_finish(struct intel_guc *guc) { + int outstanding; + /* Reset called during driver load or during wedge? */ if (unlikely(!guc_submission_initialized(guc) || !intel_guc_is_fw_running(guc) || @@ -2018,8 +2061,10 @@ void intel_guc_submission_reset_finish(struct intel_guc *guc) * see in CI if this happens frequently / a precursor to taking down the * machine. */ - if (atomic_read(&guc->outstanding_submission_g2h)) - guc_err(guc, "Unexpected outstanding GuC to Host in reset finish\n"); + outstanding = atomic_read(&guc->outstanding_submission_g2h); + if (outstanding) + guc_err(guc, "Unexpected outstanding GuC to Host response(s) in reset finish: %d\n", + outstanding); atomic_set(&guc->outstanding_submission_g2h, 0); intel_guc_global_policies_update(guc); diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index bcf854dc93b4..eb3fcc9e77a5 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -1802,7 +1802,6 @@ static const struct drm_driver i915_drm_driver = { .fops = &i915_driver_fops, .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, diff --git a/drivers/gpu/drm/i915/i915_driver.h b/drivers/gpu/drm/i915/i915_driver.h index 94a70d8ec5d5..4b67ad9a61cd 100644 --- a/drivers/gpu/drm/i915/i915_driver.h +++ b/drivers/gpu/drm/i915/i915_driver.h @@ -15,7 +15,6 @@ struct drm_printer; #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20230929" #define DRIVER_TIMESTAMP 1695980603 extern const struct dev_pm_ops i915_pm_ops; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 71c0daef1996..819ab933bb10 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -841,7 +841,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, err_printf(m, "Kernel: %s %s\n", init_utsname()->release, init_utsname()->machine); - err_printf(m, "Driver: %s\n", DRIVER_DATE); ts = ktime_to_timespec64(error->time); err_printf(m, "Time: %lld s %ld us\n", (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c index f5c97a620962..76e2801619f0 100644 --- a/drivers/gpu/drm/i915/i915_mm.c +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -143,8 +143,8 @@ int remap_io_sg(struct vm_area_struct *vma, /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ GEM_BUG_ON((vma->vm_flags & EXPECTED_FLAGS) != EXPECTED_FLAGS); - while (offset >= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT) { - offset -= sg_dma_len(r.sgt.sgp) >> PAGE_SHIFT; + while (offset >= r.sgt.max >> PAGE_SHIFT) { + offset -= r.sgt.max >> PAGE_SHIFT; r.sgt = __sgt_iter(__sg_next(r.sgt.sgp), use_dma(iobase)); if (!r.sgt.sgp) return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index 93fbf53578da..e55db036be1b 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -302,7 +302,7 @@ void i915_pmu_gt_parked(struct intel_gt *gt) { struct i915_pmu *pmu = >->i915->pmu; - if (!pmu->base.event_init) + if (!pmu->registered) return; spin_lock_irq(&pmu->lock); @@ -324,7 +324,7 @@ void i915_pmu_gt_unparked(struct intel_gt *gt) { struct i915_pmu *pmu = >->i915->pmu; - if (!pmu->base.event_init) + if (!pmu->registered) return; spin_lock_irq(&pmu->lock); @@ -626,7 +626,7 @@ static int i915_pmu_event_init(struct perf_event *event) struct drm_i915_private *i915 = pmu_to_i915(pmu); int ret; - if (pmu->closed) + if (!pmu->registered) return -ENODEV; if (event->attr.type != event->pmu->type) @@ -724,7 +724,7 @@ static void i915_pmu_event_read(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; u64 prev, new; - if (pmu->closed) { + if (!pmu->registered) { event->hw.state = PERF_HES_STOPPED; return; } @@ -850,7 +850,7 @@ static void i915_pmu_event_start(struct perf_event *event, int flags) { struct i915_pmu *pmu = event_to_pmu(event); - if (pmu->closed) + if (!pmu->registered) return; i915_pmu_enable(event); @@ -861,7 +861,7 @@ static void i915_pmu_event_stop(struct perf_event *event, int flags) { struct i915_pmu *pmu = event_to_pmu(event); - if (pmu->closed) + if (!pmu->registered) goto out; if (flags & PERF_EF_UPDATE) @@ -877,7 +877,7 @@ static int i915_pmu_event_add(struct perf_event *event, int flags) { struct i915_pmu *pmu = event_to_pmu(event); - if (pmu->closed) + if (!pmu->registered) return -ENODEV; if (flags & PERF_EF_START) @@ -1177,8 +1177,6 @@ static int i915_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) { struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); - GEM_BUG_ON(!pmu->base.event_init); - /* Select the first online CPU as a designated reader. */ if (cpumask_empty(&i915_pmu_cpumask)) cpumask_set_cpu(cpu, &i915_pmu_cpumask); @@ -1191,13 +1189,11 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) struct i915_pmu *pmu = hlist_entry_safe(node, typeof(*pmu), cpuhp.node); unsigned int target = i915_pmu_target_cpu; - GEM_BUG_ON(!pmu->base.event_init); - /* * Unregistering an instance generates a CPU offline event which we must * ignore to avoid incorrectly modifying the shared i915_pmu_cpumask. */ - if (pmu->closed) + if (!pmu->registered) return 0; if (cpumask_test_and_clear_cpu(cpu, &i915_pmu_cpumask)) { @@ -1218,7 +1214,7 @@ static int i915_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) return 0; } -static enum cpuhp_state cpuhp_slot = CPUHP_INVALID; +static enum cpuhp_state cpuhp_state = CPUHP_INVALID; int i915_pmu_init(void) { @@ -1232,28 +1228,28 @@ int i915_pmu_init(void) pr_notice("Failed to setup cpuhp state for i915 PMU! (%d)\n", ret); else - cpuhp_slot = ret; + cpuhp_state = ret; return 0; } void i915_pmu_exit(void) { - if (cpuhp_slot != CPUHP_INVALID) - cpuhp_remove_multi_state(cpuhp_slot); + if (cpuhp_state != CPUHP_INVALID) + cpuhp_remove_multi_state(cpuhp_state); } static int i915_pmu_register_cpuhp_state(struct i915_pmu *pmu) { - if (cpuhp_slot == CPUHP_INVALID) + if (cpuhp_state == CPUHP_INVALID) return -EINVAL; - return cpuhp_state_add_instance(cpuhp_slot, &pmu->cpuhp.node); + return cpuhp_state_add_instance(cpuhp_state, &pmu->cpuhp.node); } static void i915_pmu_unregister_cpuhp_state(struct i915_pmu *pmu) { - cpuhp_state_remove_instance(cpuhp_slot, &pmu->cpuhp.node); + cpuhp_state_remove_instance(cpuhp_state, &pmu->cpuhp.node); } void i915_pmu_register(struct drm_i915_private *i915) @@ -1265,7 +1261,6 @@ void i915_pmu_register(struct drm_i915_private *i915) &i915_pmu_cpumask_attr_group, NULL }; - int ret = -ENOMEM; spin_lock_init(&pmu->lock); @@ -1316,6 +1311,8 @@ void i915_pmu_register(struct drm_i915_private *i915) if (ret) goto err_unreg; + pmu->registered = true; + return; err_unreg: @@ -1323,7 +1320,6 @@ err_unreg: err_groups: kfree(pmu->base.attr_groups); err_attr: - pmu->base.event_init = NULL; free_event_attributes(pmu); err_name: if (IS_DGFX(i915)) @@ -1336,23 +1332,17 @@ void i915_pmu_unregister(struct drm_i915_private *i915) { struct i915_pmu *pmu = &i915->pmu; - if (!pmu->base.event_init) + if (!pmu->registered) return; - /* - * "Disconnect" the PMU callbacks - since all are atomic synchronize_rcu - * ensures all currently executing ones will have exited before we - * proceed with unregistration. - */ - pmu->closed = true; - synchronize_rcu(); + /* Disconnect the PMU callbacks */ + pmu->registered = false; hrtimer_cancel(&pmu->timer); i915_pmu_unregister_cpuhp_state(pmu); perf_pmu_unregister(&pmu->base); - pmu->base.event_init = NULL; kfree(pmu->base.attr_groups); if (IS_DGFX(i915)) kfree(pmu->name); diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 41af038c3738..8e66d63d0c9f 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -68,9 +68,9 @@ struct i915_pmu { */ struct pmu base; /** - * @closed: i915 is unregistering. + * @registered: PMU is registered and not in the unregistering process. */ - bool closed; + bool registered; /** * @name: Name as registered with perf core. */ diff --git a/drivers/gpu/drm/imagination/pvr_drv.c b/drivers/gpu/drm/imagination/pvr_drv.c index 85ee9abd1811..0639502137b4 100644 --- a/drivers/gpu/drm/imagination/pvr_drv.c +++ b/drivers/gpu/drm/imagination/pvr_drv.c @@ -1387,7 +1387,6 @@ static struct drm_driver pvr_drm_driver = { .name = PVR_DRIVER_NAME, .desc = PVR_DRIVER_DESC, - .date = PVR_DRIVER_DATE, .major = PVR_DRIVER_MAJOR, .minor = PVR_DRIVER_MINOR, .patchlevel = PVR_DRIVER_PATCHLEVEL, diff --git a/drivers/gpu/drm/imagination/pvr_drv.h b/drivers/gpu/drm/imagination/pvr_drv.h index 378fe477b759..7fa147312dd1 100644 --- a/drivers/gpu/drm/imagination/pvr_drv.h +++ b/drivers/gpu/drm/imagination/pvr_drv.h @@ -9,7 +9,6 @@ #define PVR_DRIVER_NAME "powervr" #define PVR_DRIVER_DESC "Imagination PowerVR (Series 6 and later) & IMG Graphics" -#define PVR_DRIVER_DATE "20230904" /* * Driver interface version: diff --git a/drivers/gpu/drm/imx/dcss/dcss-kms.c b/drivers/gpu/drm/imx/dcss/dcss-kms.c index 63a335c62296..3633e8f3aff6 100644 --- a/drivers/gpu/drm/imx/dcss/dcss-kms.c +++ b/drivers/gpu/drm/imx/dcss/dcss-kms.c @@ -3,11 +3,11 @@ * Copyright 2019 NXP. */ +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_bridge.h> #include <drm/drm_bridge_connector.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_dma_helper.h> @@ -34,7 +34,6 @@ static const struct drm_driver dcss_kms_driver = { .fops = &dcss_cma_fops, .name = "imx-dcss", .desc = "i.MX8MQ Display Subsystem", - .date = "20190917", .major = 1, .minor = 0, .patchlevel = 0, diff --git a/drivers/gpu/drm/imx/ipuv3/imx-drm-core.c b/drivers/gpu/drm/imx/ipuv3/imx-drm-core.c index 5f2c93c3c288..ec5fd9a01f1e 100644 --- a/drivers/gpu/drm/imx/ipuv3/imx-drm-core.c +++ b/drivers/gpu/drm/imx/ipuv3/imx-drm-core.c @@ -13,9 +13,9 @@ #include <video/imx-ipu-v3.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_dma_helper.h> @@ -163,7 +163,6 @@ static const struct drm_driver imx_drm_driver = { .fops = &imx_drm_driver_fops, .name = "imx-drm", .desc = "i.MX DRM graphics", - .date = "20120507", .major = 1, .minor = 0, .patchlevel = 0, diff --git a/drivers/gpu/drm/imx/lcdc/imx-lcdc.c b/drivers/gpu/drm/imx/lcdc/imx-lcdc.c index fa7d44623c52..8d6a0bb31c48 100644 --- a/drivers/gpu/drm/imx/lcdc/imx-lcdc.c +++ b/drivers/gpu/drm/imx/lcdc/imx-lcdc.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only // SPDX-FileCopyrightText: 2020 Marian Cichy <M.Cichy@pengutronix.de> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_bridge.h> #include <drm/drm_bridge_connector.h> -#include <drm/drm_client_setup.h> #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> @@ -352,7 +352,6 @@ static struct drm_driver imx_lcdc_drm_driver = { DRM_FBDEV_DMA_DRIVER_OPS, .name = "imx-lcdc", .desc = "i.MX LCDC driver", - .date = "20200716", }; static const struct of_device_id imx_lcdc_of_dev_id[] = { diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c index 8469e1e5e582..c23ee2d214de 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c @@ -20,11 +20,11 @@ #include <linux/pm.h> #include <linux/regmap.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_bridge.h> #include <drm/drm_bridge_connector.h> -#include <drm/drm_client_setup.h> #include <drm/drm_color_mgmt.h> #include <drm/drm_crtc.h> #include <drm/drm_damage_helper.h> @@ -953,7 +953,6 @@ static const struct drm_driver ingenic_drm_driver_data = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, .name = "ingenic-drm", .desc = "DRM module for Ingenic SoCs", - .date = "20200716", .major = 1, .minor = 1, .patchlevel = 0, diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c index a3d31de761cb..32cda134ae3e 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.c +++ b/drivers/gpu/drm/kmb/kmb_drv.c @@ -13,8 +13,8 @@ #include <linux/pm_runtime.h> #include <linux/regmap.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_dma_helper.h> @@ -445,7 +445,6 @@ static const struct drm_driver kmb_driver = { DRM_FBDEV_DMA_DRIVER_OPS, .name = "kmb-drm", .desc = "KEEMBAY DISPLAY DRIVER", - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, }; diff --git a/drivers/gpu/drm/kmb/kmb_drv.h b/drivers/gpu/drm/kmb/kmb_drv.h index bf085e95b28f..1f0c10d317fe 100644 --- a/drivers/gpu/drm/kmb/kmb_drv.h +++ b/drivers/gpu/drm/kmb/kmb_drv.h @@ -16,7 +16,6 @@ #define KMB_MIN_WIDTH 1920 /*Max width in pixels */ #define KMB_MIN_HEIGHT 1080 /*Max height in pixels */ -#define DRIVER_DATE "20210223" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 1 diff --git a/drivers/gpu/drm/lima/lima_drv.c b/drivers/gpu/drm/lima/lima_drv.c index fb3062c872b3..2067c5b65c57 100644 --- a/drivers/gpu/drm/lima/lima_drv.c +++ b/drivers/gpu/drm/lima/lima_drv.c @@ -271,7 +271,6 @@ static const struct drm_driver lima_drm_driver = { .fops = &lima_drm_driver_fops, .name = "lima", .desc = "lima DRM", - .date = "20191231", .major = 1, .minor = 1, .patchlevel = 0, diff --git a/drivers/gpu/drm/logicvc/logicvc_drm.c b/drivers/gpu/drm/logicvc/logicvc_drm.c index fb9de5e0bc0e..204b0fee55d0 100644 --- a/drivers/gpu/drm/logicvc/logicvc_drm.c +++ b/drivers/gpu/drm/logicvc/logicvc_drm.c @@ -15,8 +15,8 @@ #include <linux/regmap.h> #include <linux/types.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_fourcc.h> @@ -52,7 +52,6 @@ static struct drm_driver logicvc_drm_driver = { .fops = &logicvc_drm_fops, .name = "logicvc-drm", .desc = "Xylon LogiCVC DRM driver", - .date = "20200403", .major = 1, .minor = 0, diff --git a/drivers/gpu/drm/loongson/lsdc_drv.c b/drivers/gpu/drm/loongson/lsdc_drv.c index b350bdcf1645..12193d2a301a 100644 --- a/drivers/gpu/drm/loongson/lsdc_drv.c +++ b/drivers/gpu/drm/loongson/lsdc_drv.c @@ -7,9 +7,9 @@ #include <linux/pci.h> #include <linux/vgaarb.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_ttm.h> #include <drm/drm_gem_framebuffer_helper.h> @@ -26,7 +26,6 @@ #define DRIVER_AUTHOR "Sui Jingfeng <suijingfeng@loongson.cn>" #define DRIVER_NAME "loongson" #define DRIVER_DESC "drm driver for loongson graphics" -#define DRIVER_DATE "20220701" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 #define DRIVER_PATCHLEVEL 0 @@ -39,7 +38,6 @@ static const struct drm_driver lsdc_drm_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, @@ -232,9 +230,9 @@ lsdc_create_device(struct pci_dev *pdev, lsdc_gem_init(ddev); /* Bar 0 of the DC device contains the MMIO register's base address */ - ldev->reg_base = pcim_iomap(pdev, 0, 0); - if (!ldev->reg_base) - return ERR_PTR(-ENODEV); + ldev->reg_base = pcim_iomap_region(pdev, 0, "lsdc"); + if (IS_ERR(ldev->reg_base)) + return ldev->reg_base; spin_lock_init(&ldev->reglock); diff --git a/drivers/gpu/drm/mcde/mcde_drv.c b/drivers/gpu/drm/mcde/mcde_drv.c index c4d51f5f038d..5f2c462bad7e 100644 --- a/drivers/gpu/drm/mcde/mcde_drv.c +++ b/drivers/gpu/drm/mcde/mcde_drv.c @@ -65,9 +65,9 @@ #include <linux/slab.h> #include <linux/delay.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_bridge.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fb_dma_helper.h> #include <drm/drm_fbdev_dma.h> @@ -208,7 +208,6 @@ static const struct drm_driver mcde_drm_driver = { .fops = &drm_fops, .name = "mcde", .desc = DRIVER_DESC, - .date = "20180529", .major = 1, .minor = 0, .patchlevel = 0, diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index 0829ceb9967c..49af2b19a85a 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -12,9 +12,9 @@ #include <linux/pm_runtime.h> #include <linux/dma-mapping.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_fourcc.h> @@ -33,7 +33,6 @@ #define DRIVER_NAME "mediatek" #define DRIVER_DESC "Mediatek SoC DRM" -#define DRIVER_DATE "20150513" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 @@ -615,7 +614,6 @@ static const struct drm_driver mtk_drm_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, }; diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 0f5a1a54544e..81d2ee37e773 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -16,8 +16,8 @@ #include <linux/platform_device.h> #include <linux/soc/amlogic/meson-canvas.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_dma_helper.h> @@ -105,7 +105,6 @@ static const struct drm_driver meson_driver = { .fops = &fops, .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = "20161109", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c index 97fd7eb765b4..069fdd2dc8f6 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.c +++ b/drivers/gpu/drm/mgag200/mgag200_drv.c @@ -10,8 +10,8 @@ #include <linux/module.h> #include <linux/pci.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_shmem.h> #include <drm/drm_file.h> @@ -97,7 +97,6 @@ static const struct drm_driver mgag200_driver = { .fops = &mgag200_driver_fops, .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h index 988967eafbf2..0608fc63e588 100644 --- a/drivers/gpu/drm/mgag200/mgag200_drv.h +++ b/drivers/gpu/drm/mgag200/mgag200_drv.h @@ -25,7 +25,6 @@ #define DRIVER_NAME "mgag200" #define DRIVER_DESC "MGA G200 SE" -#define DRIVER_DATE "20110418" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 diff --git a/drivers/gpu/drm/msm/dp/dp_audio.c b/drivers/gpu/drm/msm/dp/dp_audio.c index 74e01a5dd419..0fd5e0abaf07 100644 --- a/drivers/gpu/drm/msm/dp/dp_audio.c +++ b/drivers/gpu/drm/msm/dp/dp_audio.c @@ -414,8 +414,10 @@ static int msm_dp_audio_get_eld(struct device *dev, return -ENODEV; } + mutex_lock(&msm_dp_display->connector->eld_mutex); memcpy(buf, msm_dp_display->connector->eld, min(sizeof(msm_dp_display->connector->eld), len)); + mutex_unlock(&msm_dp_display->connector->eld_mutex); return 0; } diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index ffbcc97b5018..34b971490f10 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -11,7 +11,7 @@ #include <linux/of_address.h> #include <linux/uaccess.h> -#include <drm/drm_client_setup.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_ioctl.h> @@ -910,7 +910,6 @@ static const struct drm_driver msm_driver = { .fops = &fops, .name = "msm", .desc = "MSM Snapdragon DRM", - .date = "20130625", .major = MSM_VERSION_MAJOR, .minor = MSM_VERSION_MINOR, .patchlevel = MSM_VERSION_PATCHLEVEL, diff --git a/drivers/gpu/drm/mxsfb/lcdif_drv.c b/drivers/gpu/drm/mxsfb/lcdif_drv.c index 51ae0b51b1e8..8ee00f59ca82 100644 --- a/drivers/gpu/drm/mxsfb/lcdif_drv.c +++ b/drivers/gpu/drm/mxsfb/lcdif_drv.c @@ -14,9 +14,9 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_bridge.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_encoder.h> #include <drm/drm_fbdev_dma.h> @@ -248,7 +248,6 @@ static const struct drm_driver lcdif_driver = { .fops = &fops, .name = "imx-lcdif", .desc = "i.MX LCDIF Controller DRM", - .date = "20220417", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index 6b95e4eb3e4e..59020862cf65 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -17,9 +17,9 @@ #include <linux/property.h> #include <linux/pm_runtime.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_bridge.h> -#include <drm/drm_client_setup.h> #include <drm/drm_connector.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> @@ -336,7 +336,6 @@ static const struct drm_driver mxsfb_driver = { .fops = &fops, .name = "mxsfb-drm", .desc = "MXSFB Controller DRM", - .date = "20160824", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index eed579a6c858..8097249612bc 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -1265,8 +1265,8 @@ nv50_mstc_new(struct nv50_mstm *mstm, struct drm_dp_mst_port *port, mstc->mstm = mstm; mstc->port = port; - ret = drm_connector_init(dev, &mstc->connector, &nv50_mstc, - DRM_MODE_CONNECTOR_DisplayPort); + ret = drm_connector_dynamic_init(dev, &mstc->connector, &nv50_mstc, + DRM_MODE_CONNECTOR_DisplayPort, NULL); if (ret) { kfree(*pmstc); *pmstc = NULL; diff --git a/drivers/gpu/drm/nouveau/include/nvif/log.h b/drivers/gpu/drm/nouveau/include/nvif/log.h new file mode 100644 index 000000000000..64f6f8fc6141 --- /dev/null +++ b/drivers/gpu/drm/nouveau/include/nvif/log.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: MIT */ +/* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. */ + +#ifndef __NVIF_LOG_H__ +#define __NVIF_LOG_H__ + +#ifdef CONFIG_DEBUG_FS + +/** + * nvif_log - structure for tracking logging buffers + * @entry: an entry in a list of struct nvif_logs + * @shutdown: pointer to function to call to clean up + * + * Structure used to track logging buffers so that they can be cleaned up + * when the module exits. + * + * The @shutdown function is called when the module exits. It should free all + * backing resources, such as logging buffers. + */ +struct nvif_log { + struct list_head entry; + void (*shutdown)(struct nvif_log *log); +}; + +/** + * nvif_logs - linked list of nvif_log objects + */ +struct nvif_logs { + struct list_head head; +}; + +#define NVIF_LOGS_DECLARE(logs) \ + struct nvif_logs logs = { LIST_HEAD_INIT(logs.head) } + +static inline void nvif_log_shutdown(struct nvif_logs *logs) +{ + if (!list_empty(&logs->head)) { + struct nvif_log *log, *n; + + list_for_each_entry_safe(log, n, &logs->head, entry) { + /* shutdown() should also delete the log entry */ + log->shutdown(log); + } + } +} + +extern struct nvif_logs gsp_logs; + +#endif + +#endif diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h index a2055f2a014a..5c5f4607fcc9 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h @@ -5,10 +5,13 @@ #include <core/falcon.h> #include <core/firmware.h> +#include <linux/debugfs.h> + #define GSP_PAGE_SHIFT 12 #define GSP_PAGE_SIZE BIT(GSP_PAGE_SHIFT) struct nvkm_gsp_mem { + struct device *dev; size_t size; void *data; dma_addr_t addr; @@ -219,6 +222,24 @@ struct nvkm_gsp { /* The size of the registry RPC */ size_t registry_rpc_size; + +#ifdef CONFIG_DEBUG_FS + /* + * Logging buffers in debugfs. The wrapper objects need to remain + * in memory until the dentry is deleted. + */ + struct { + struct dentry *parent; + struct dentry *init; + struct dentry *rm; + struct dentry *intr; + struct dentry *pmu; + } debugfs; + struct debugfs_blob_wrapper blob_init; + struct debugfs_blob_wrapper blob_intr; + struct debugfs_blob_wrapper blob_rm; + struct debugfs_blob_wrapper blob_pmu; +#endif }; static inline bool diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index e83db051e851..200e65a7cefc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -313,3 +313,19 @@ nouveau_debugfs_fini(struct nouveau_drm *drm) kfree(drm->debugfs); drm->debugfs = NULL; } + +int +nouveau_module_debugfs_init(void) +{ + nouveau_debugfs_root = debugfs_create_dir("nouveau", NULL); + if (IS_ERR(nouveau_debugfs_root)) + return PTR_ERR(nouveau_debugfs_root); + + return 0; +} + +void +nouveau_module_debugfs_fini(void) +{ + debugfs_remove(nouveau_debugfs_root); +} diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.h b/drivers/gpu/drm/nouveau/nouveau_debugfs.h index 77f0323b38ba..b7617b344ee2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.h +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.h @@ -21,6 +21,11 @@ nouveau_debugfs(struct drm_device *dev) extern void nouveau_drm_debugfs_init(struct drm_minor *); extern int nouveau_debugfs_init(struct nouveau_drm *); extern void nouveau_debugfs_fini(struct nouveau_drm *); + +extern struct dentry *nouveau_debugfs_root; + +int nouveau_module_debugfs_init(void); +void nouveau_module_debugfs_fini(void); #else static inline void nouveau_drm_debugfs_init(struct drm_minor *minor) @@ -37,6 +42,17 @@ nouveau_debugfs_fini(struct nouveau_drm *drm) { } +static inline int +nouveau_module_debugfs_init(void) +{ + return 0; +} + +static inline void +nouveau_module_debugfs_fini(void) +{ +} + #endif #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 107f63f08bd9..21d2d9ca5e85 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -30,8 +30,9 @@ #include <linux/vga_switcheroo.h> #include <linux/mmu_notifier.h> #include <linux/dynamic_debug.h> +#include <linux/debugfs.h> -#include <drm/drm_client_setup.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_ttm.h> #include <drm/drm_gem_ttm_helper.h> @@ -47,6 +48,7 @@ #include <nvif/fifo.h> #include <nvif/push006c.h> #include <nvif/user.h> +#include <nvif/log.h> #include <nvif/class.h> #include <nvif/cl0002.h> @@ -113,6 +115,20 @@ static struct drm_driver driver_stub; static struct drm_driver driver_pci; static struct drm_driver driver_platform; +#ifdef CONFIG_DEBUG_FS +struct dentry *nouveau_debugfs_root; + +/** + * gsp_logs - list of nvif_log GSP-RM logging buffers + * + * Head pointer to a a list of nvif_log buffers that is created for each GPU + * upon GSP shutdown if the "keep_gsp_logging" command-line parameter is + * specified. This is used to track the alternative debugfs entries for the + * GSP-RM logs. + */ +NVIF_LOGS_DECLARE(gsp_logs); +#endif + static u64 nouveau_pci_name(struct pci_dev *pdev) { @@ -1326,11 +1342,6 @@ driver_stub = { .name = DRIVER_NAME, .desc = DRIVER_DESC, -#ifdef GIT_REVISION - .date = GIT_REVISION, -#else - .date = DRIVER_DATE, -#endif .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, @@ -1423,6 +1434,8 @@ err_free: static int __init nouveau_drm_init(void) { + int ret; + driver_pci = driver_stub; driver_platform = driver_stub; @@ -1436,6 +1449,10 @@ nouveau_drm_init(void) if (!nouveau_modeset) return 0; + ret = nouveau_module_debugfs_init(); + if (ret) + return ret; + #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER platform_driver_register(&nouveau_platform_driver); #endif @@ -1444,10 +1461,14 @@ nouveau_drm_init(void) nouveau_backlight_ctor(); #ifdef CONFIG_PCI - return pci_register_driver(&nouveau_drm_pci_driver); -#else - return 0; + ret = pci_register_driver(&nouveau_drm_pci_driver); + if (ret) { + nouveau_module_debugfs_fini(); + return ret; + } #endif + + return 0; } static void __exit @@ -1467,6 +1488,12 @@ nouveau_drm_exit(void) #endif if (IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM)) mmu_notifier_synchronize(); + +#ifdef CONFIG_DEBUG_FS + nvif_log_shutdown(&gsp_logs); +#endif + + nouveau_module_debugfs_fini(); } module_init(nouveau_drm_init); diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index 685d6ca3d8aa..55abc510067b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -7,7 +7,6 @@ #define DRIVER_NAME "nouveau" #define DRIVER_DESC "nVidia Riva/TNT/GeForce/Quadro/Tesla/Tegra K1+" -#define DRIVER_DATE "20120801" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 4 diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index d586aea30898..58502102926b 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -26,6 +26,7 @@ #include <subdev/vfn.h> #include <engine/fifo/chan.h> #include <engine/sec2.h> +#include <nvif/log.h> #include <nvfw/fw.h> @@ -57,6 +58,8 @@ #include <linux/ctype.h> #include <linux/parser.h> +extern struct dentry *nouveau_debugfs_root; + #define GSP_MSG_MIN_SIZE GSP_PAGE_SIZE #define GSP_MSG_MAX_SIZE GSP_PAGE_MIN_SIZE * 16 @@ -121,6 +124,8 @@ r535_gsp_msgq_wait(struct nvkm_gsp *gsp, u32 repc, u32 *prepc, int *ptime) return mqe->data; } + size = ALIGN(repc + GSP_MSG_HDR_SIZE, GSP_PAGE_SIZE); + msg = kvmalloc(repc, GFP_KERNEL); if (!msg) return ERR_PTR(-ENOMEM); @@ -129,19 +134,15 @@ r535_gsp_msgq_wait(struct nvkm_gsp *gsp, u32 repc, u32 *prepc, int *ptime) len = min_t(u32, repc, len); memcpy(msg, mqe->data, len); - rptr += DIV_ROUND_UP(len, GSP_PAGE_SIZE); - if (rptr == gsp->msgq.cnt) - rptr = 0; - repc -= len; if (repc) { mqe = (void *)((u8 *)gsp->shm.msgq.ptr + 0x1000 + 0 * 0x1000); memcpy(msg + len, mqe, repc); - - rptr += DIV_ROUND_UP(repc, GSP_PAGE_SIZE); } + rptr = (rptr + DIV_ROUND_UP(size, GSP_PAGE_SIZE)) % gsp->msgq.cnt; + mb(); (*gsp->msgq.rptr) = rptr; return msg; @@ -163,7 +164,7 @@ r535_gsp_cmdq_push(struct nvkm_gsp *gsp, void *argv) u64 *end; u64 csum = 0; int free, time = 1000000; - u32 wptr, size; + u32 wptr, size, step; u32 off = 0; argc = ALIGN(GSP_MSG_HDR_SIZE + argc, GSP_PAGE_SIZE); @@ -197,7 +198,9 @@ r535_gsp_cmdq_push(struct nvkm_gsp *gsp, void *argv) } cqe = (void *)((u8 *)gsp->shm.cmdq.ptr + 0x1000 + wptr * 0x1000); - size = min_t(u32, argc, (gsp->cmdq.cnt - wptr) * GSP_PAGE_SIZE); + step = min_t(u32, free, (gsp->cmdq.cnt - wptr)); + size = min_t(u32, argc, step * GSP_PAGE_SIZE); + memcpy(cqe, (u8 *)cmd + off, size); wptr += DIV_ROUND_UP(size, 0x1000); @@ -1000,7 +1003,7 @@ r535_gsp_rpc_get_gsp_static_info(struct nvkm_gsp *gsp) } static void -nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem) +nvkm_gsp_mem_dtor(struct nvkm_gsp_mem *mem) { if (mem->data) { /* @@ -1009,19 +1012,35 @@ nvkm_gsp_mem_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_mem *mem) */ memset(mem->data, 0xFF, mem->size); - dma_free_coherent(gsp->subdev.device->dev, mem->size, mem->data, mem->addr); + dma_free_coherent(mem->dev, mem->size, mem->data, mem->addr); + put_device(mem->dev); + memset(mem, 0, sizeof(*mem)); } } +/** + * nvkm_gsp_mem_ctor - constructor for nvkm_gsp_mem objects + * @gsp: gsp pointer + * @size: number of bytes to allocate + * @mem: nvkm_gsp_mem object to initialize + * + * Allocates a block of memory for use with GSP. + * + * This memory block can potentially out-live the driver's remove() callback, + * so we take a device reference to ensure its lifetime. The reference is + * dropped in the destructor. + */ static int nvkm_gsp_mem_ctor(struct nvkm_gsp *gsp, size_t size, struct nvkm_gsp_mem *mem) { - mem->size = size; mem->data = dma_alloc_coherent(gsp->subdev.device->dev, size, &mem->addr, GFP_KERNEL); if (WARN_ON(!mem->data)) return -ENOMEM; + mem->size = size; + mem->dev = get_device(gsp->subdev.device->dev); + return 0; } @@ -1054,8 +1073,8 @@ r535_gsp_postinit(struct nvkm_gsp *gsp) nvkm_wr32(device, 0x110004, 0x00000040); /* Release the DMA buffers that were needed only for boot and init */ - nvkm_gsp_mem_dtor(gsp, &gsp->boot.fw); - nvkm_gsp_mem_dtor(gsp, &gsp->libos); + nvkm_gsp_mem_dtor(&gsp->boot.fw); + nvkm_gsp_mem_dtor(&gsp->libos); return ret; } @@ -2060,6 +2079,215 @@ r535_gsp_rmargs_init(struct nvkm_gsp *gsp, bool resume) return 0; } +#ifdef CONFIG_DEBUG_FS + +/* + * If GSP-RM load fails, then the GSP nvkm object will be deleted, the logging + * debugfs entries will be deleted, and it will not be possible to debug the + * load failure. The keep_gsp_logging parameter tells Nouveau to copy the + * logging buffers to new debugfs entries, and these entries are retained + * until the driver unloads. + */ +static bool keep_gsp_logging; +module_param(keep_gsp_logging, bool, 0444); +MODULE_PARM_DESC(keep_gsp_logging, + "Migrate the GSP-RM logging debugfs entries upon exit"); + +/* + * GSP-RM uses a pseudo-class mechanism to define of a variety of per-"engine" + * data structures, and each engine has a "class ID" genererated by a + * pre-processor. This is the class ID for the PMU. + */ +#define NV_GSP_MSG_EVENT_UCODE_LIBOS_CLASS_PMU 0xf3d722 + +/** + * rpc_ucode_libos_print_v1E_08 - RPC payload for libos print buffers + * @ucode_eng_desc: the engine descriptor + * @libos_print_buf_size: the size of the libos_print_buf[] + * @libos_print_buf: the actual buffer + * + * The engine descriptor is divided into 31:8 "class ID" and 7:0 "instance + * ID". We only care about messages from PMU. + */ +struct rpc_ucode_libos_print_v1e_08 { + u32 ucode_eng_desc; + u32 libos_print_buf_size; + u8 libos_print_buf[]; +}; + +/** + * r535_gsp_msg_libos_print - capture log message from the PMU + * @priv: gsp pointer + * @fn: function number (ignored) + * @repv: pointer to libos print RPC + * @repc: message size + * + * Called when we receive a UCODE_LIBOS_PRINT event RPC from GSP-RM. This RPC + * contains the contents of the libos print buffer from PMU. It is typically + * only written to when PMU encounters an error. + * + * Technically this RPC can be used to pass print buffers from any number of + * GSP-RM engines, but we only expect to receive them for the PMU. + * + * For the PMU, the buffer is 4K in size and the RPC always contains the full + * contents. + */ +static int +r535_gsp_msg_libos_print(void *priv, u32 fn, void *repv, u32 repc) +{ + struct nvkm_gsp *gsp = priv; + struct nvkm_subdev *subdev = &gsp->subdev; + struct rpc_ucode_libos_print_v1e_08 *rpc = repv; + unsigned int class = rpc->ucode_eng_desc >> 8; + + nvkm_debug(subdev, "received libos print from class 0x%x for %u bytes\n", + class, rpc->libos_print_buf_size); + + if (class != NV_GSP_MSG_EVENT_UCODE_LIBOS_CLASS_PMU) { + nvkm_warn(subdev, + "received libos print from unknown class 0x%x\n", + class); + return -ENOMSG; + } + + if (rpc->libos_print_buf_size > GSP_PAGE_SIZE) { + nvkm_error(subdev, "libos print is too large (%u bytes)\n", + rpc->libos_print_buf_size); + return -E2BIG; + } + + memcpy(gsp->blob_pmu.data, rpc->libos_print_buf, rpc->libos_print_buf_size); + + return 0; +} + +/** + * create_debufgs - create a blob debugfs entry + * @gsp: gsp pointer + * @name: name of this dentry + * @blob: blob wrapper + * + * Creates a debugfs entry for a logging buffer with the name 'name'. + */ +static struct dentry *create_debugfs(struct nvkm_gsp *gsp, const char *name, + struct debugfs_blob_wrapper *blob) +{ + struct dentry *dent; + + dent = debugfs_create_blob(name, 0444, gsp->debugfs.parent, blob); + if (IS_ERR(dent)) { + nvkm_error(&gsp->subdev, + "failed to create %s debugfs entry\n", name); + return NULL; + } + + /* + * For some reason, debugfs_create_blob doesn't set the size of the + * dentry, so do that here. See [1] + * + * [1] https://lore.kernel.org/r/linux-fsdevel/20240207200619.3354549-1-ttabi@nvidia.com/ + */ + i_size_write(d_inode(dent), blob->size); + + return dent; +} + +/** + * r535_gsp_libos_debugfs_init - create logging debugfs entries + * @gsp: gsp pointer + * + * Create the debugfs entries. This exposes the log buffers to userspace so + * that an external tool can parse it. + * + * The 'logpmu' contains exception dumps from the PMU. It is written via an + * RPC sent from GSP-RM and must be only 4KB. We create it here because it's + * only useful if there is a debugfs entry to expose it. If we get the PMU + * logging RPC and there is no debugfs entry, the RPC is just ignored. + * + * The blob_init, blob_rm, and blob_pmu objects can't be transient + * because debugfs_create_blob doesn't copy them. + * + * NOTE: OpenRM loads the logging elf image and prints the log messages + * in real-time. We may add that capability in the future, but that + * requires loading ELF images that are not distributed with the driver and + * adding the parsing code to Nouveau. + * + * Ideally, this should be part of nouveau_debugfs_init(), but that function + * is called too late. We really want to create these debugfs entries before + * r535_gsp_booter_load() is called, so that if GSP-RM fails to initialize, + * there could still be a log to capture. + */ +static void +r535_gsp_libos_debugfs_init(struct nvkm_gsp *gsp) +{ + struct device *dev = gsp->subdev.device->dev; + + /* Create a new debugfs directory with a name unique to this GPU. */ + gsp->debugfs.parent = debugfs_create_dir(dev_name(dev), nouveau_debugfs_root); + if (IS_ERR(gsp->debugfs.parent)) { + nvkm_error(&gsp->subdev, + "failed to create %s debugfs root\n", dev_name(dev)); + return; + } + + gsp->blob_init.data = gsp->loginit.data; + gsp->blob_init.size = gsp->loginit.size; + gsp->blob_intr.data = gsp->logintr.data; + gsp->blob_intr.size = gsp->logintr.size; + gsp->blob_rm.data = gsp->logrm.data; + gsp->blob_rm.size = gsp->logrm.size; + + gsp->debugfs.init = create_debugfs(gsp, "loginit", &gsp->blob_init); + if (!gsp->debugfs.init) + goto error; + + gsp->debugfs.intr = create_debugfs(gsp, "logintr", &gsp->blob_intr); + if (!gsp->debugfs.intr) + goto error; + + gsp->debugfs.rm = create_debugfs(gsp, "logrm", &gsp->blob_rm); + if (!gsp->debugfs.rm) + goto error; + + /* + * Since the PMU buffer is copied from an RPC, it doesn't need to be + * a DMA buffer. + */ + gsp->blob_pmu.size = GSP_PAGE_SIZE; + gsp->blob_pmu.data = kzalloc(gsp->blob_pmu.size, GFP_KERNEL); + if (!gsp->blob_pmu.data) + goto error; + + gsp->debugfs.pmu = create_debugfs(gsp, "logpmu", &gsp->blob_pmu); + if (!gsp->debugfs.pmu) { + kfree(gsp->blob_pmu.data); + goto error; + } + + i_size_write(d_inode(gsp->debugfs.init), gsp->blob_init.size); + i_size_write(d_inode(gsp->debugfs.intr), gsp->blob_intr.size); + i_size_write(d_inode(gsp->debugfs.rm), gsp->blob_rm.size); + i_size_write(d_inode(gsp->debugfs.pmu), gsp->blob_pmu.size); + + r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT, + r535_gsp_msg_libos_print, gsp); + + nvkm_debug(&gsp->subdev, "created debugfs GSP-RM logging entries\n"); + + if (keep_gsp_logging) { + nvkm_info(&gsp->subdev, + "logging buffers will be retained on failure\n"); + } + + return; + +error: + debugfs_remove(gsp->debugfs.parent); + gsp->debugfs.parent = NULL; +} + +#endif + static inline u64 r535_gsp_libos_id8(const char *name) { @@ -2110,7 +2338,11 @@ static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size) * written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE. * * The physical address map for the log buffer is stored in the buffer - * itself, starting with offset 1. Offset 0 contains the "put" pointer. + * itself, starting with offset 1. Offset 0 contains the "put" pointer (pp). + * Initially, pp is equal to 0. If the buffer has valid logging data in it, + * then pp points to index into the buffer where the next logging entry will + * be written. Therefore, the logging data is valid if: + * 1 <= pp < sizeof(buffer)/sizeof(u64) * * The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is * configured for a larger page size (e.g. 64K pages), we need to give @@ -2181,6 +2413,11 @@ r535_gsp_libos_init(struct nvkm_gsp *gsp) args[3].size = gsp->rmargs.size; args[3].kind = LIBOS_MEMORY_REGION_CONTIGUOUS; args[3].loc = LIBOS_MEMORY_REGION_LOC_SYSMEM; + +#ifdef CONFIG_DEBUG_FS + r535_gsp_libos_debugfs_init(gsp); +#endif + return 0; } @@ -2234,8 +2471,8 @@ static void nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3) { nvkm_gsp_sg_free(gsp->subdev.device, &rx3->lvl2); - nvkm_gsp_mem_dtor(gsp, &rx3->lvl1); - nvkm_gsp_mem_dtor(gsp, &rx3->lvl0); + nvkm_gsp_mem_dtor(&rx3->lvl1); + nvkm_gsp_mem_dtor(&rx3->lvl0); } /** @@ -2323,9 +2560,9 @@ nvkm_gsp_radix3_sg(struct nvkm_gsp *gsp, struct sg_table *sgt, u64 size, if (ret) { lvl2_fail: - nvkm_gsp_mem_dtor(gsp, &rx3->lvl1); + nvkm_gsp_mem_dtor(&rx3->lvl1); lvl1_fail: - nvkm_gsp_mem_dtor(gsp, &rx3->lvl0); + nvkm_gsp_mem_dtor(&rx3->lvl0); } return ret; @@ -2417,7 +2654,7 @@ r535_gsp_init(struct nvkm_gsp *gsp) done: if (gsp->sr.meta.data) { - nvkm_gsp_mem_dtor(gsp, &gsp->sr.meta); + nvkm_gsp_mem_dtor(&gsp->sr.meta); nvkm_gsp_radix3_dtor(gsp, &gsp->sr.radix3); nvkm_gsp_sg_free(gsp->subdev.device, &gsp->sr.sgt); return ret; @@ -2491,6 +2728,222 @@ r535_gsp_dtor_fws(struct nvkm_gsp *gsp) gsp->fws.rm = NULL; } +#ifdef CONFIG_DEBUG_FS + +struct r535_gsp_log { + struct nvif_log log; + + /* + * Logging buffers in debugfs. The wrapper objects need to remain + * in memory until the dentry is deleted. + */ + struct dentry *debugfs_logging_dir; + struct debugfs_blob_wrapper blob_init; + struct debugfs_blob_wrapper blob_intr; + struct debugfs_blob_wrapper blob_rm; + struct debugfs_blob_wrapper blob_pmu; +}; + +/** + * r535_debugfs_shutdown - delete GSP-RM logging buffers for one GPU + * @_log: nvif_log struct for this GPU + * + * Called when the driver is shutting down, to clean up the retained GSP-RM + * logging buffers. + */ +static void r535_debugfs_shutdown(struct nvif_log *_log) +{ + struct r535_gsp_log *log = container_of(_log, struct r535_gsp_log, log); + + debugfs_remove(log->debugfs_logging_dir); + + kfree(log->blob_init.data); + kfree(log->blob_intr.data); + kfree(log->blob_rm.data); + kfree(log->blob_pmu.data); + + /* We also need to delete the list object */ + kfree(log); +} + +/** + * is_empty - return true if the logging buffer was never written to + * @b: blob wrapper with ->data field pointing to logging buffer + * + * The first 64-bit field of loginit, and logintr, and logrm is the 'put' + * pointer, and it is initialized to 0. It's a dword-based index into the + * circular buffer, indicating where the next printf write will be made. + * + * If the pointer is still 0 when GSP-RM is shut down, that means that the + * buffer was never written to, so it can be ignored. + * + * This test also works for logpmu, even though it doesn't have a put pointer. + */ +static bool is_empty(const struct debugfs_blob_wrapper *b) +{ + u64 *put = b->data; + + return put ? (*put == 0) : true; +} + +/** + * r535_gsp_copy_log - preserve the logging buffers in a blob + * + * When GSP shuts down, the nvkm_gsp object and all its memory is deleted. + * To preserve the logging buffers, the buffers need to be copied, but only + * if they actually have data. + */ +static int r535_gsp_copy_log(struct dentry *parent, + const char *name, + const struct debugfs_blob_wrapper *s, + struct debugfs_blob_wrapper *t) +{ + struct dentry *dent; + void *p; + + if (is_empty(s)) + return 0; + + /* The original buffers will be deleted */ + p = kmemdup(s->data, s->size, GFP_KERNEL); + if (!p) + return -ENOMEM; + + t->data = p; + t->size = s->size; + + dent = debugfs_create_blob(name, 0444, parent, t); + if (IS_ERR(dent)) { + kfree(p); + memset(t, 0, sizeof(*t)); + return PTR_ERR(dent); + } + + i_size_write(d_inode(dent), t->size); + + return 0; +} + +/** + * r535_gsp_retain_logging - copy logging buffers to new debugfs root + * @gsp: gsp pointer + * + * If keep_gsp_logging is enabled, then we want to preserve the GSP-RM logging + * buffers and their debugfs entries, but all those objects would normally + * deleted if GSP-RM fails to load. + * + * To preserve the logging buffers, we need to: + * + * 1) Allocate new buffers and copy the logs into them, so that the original + * DMA buffers can be released. + * + * 2) Preserve the directories. We don't need to save single dentries because + * we're going to delete the parent when the + * + * If anything fails in this process, then all the dentries need to be + * deleted. We don't need to deallocate the original logging buffers because + * the caller will do that regardless. + */ +static void r535_gsp_retain_logging(struct nvkm_gsp *gsp) +{ + struct device *dev = gsp->subdev.device->dev; + struct r535_gsp_log *log = NULL; + int ret; + + if (!keep_gsp_logging || !gsp->debugfs.parent) { + /* Nothing to do */ + goto exit; + } + + /* Check to make sure at least one buffer has data. */ + if (is_empty(&gsp->blob_init) && is_empty(&gsp->blob_intr) && + is_empty(&gsp->blob_rm) && is_empty(&gsp->blob_rm)) { + nvkm_warn(&gsp->subdev, "all logging buffers are empty\n"); + goto exit; + } + + log = kzalloc(sizeof(*log), GFP_KERNEL); + if (!log) + goto error; + + /* + * Since the nvkm_gsp object is going away, the debugfs_blob_wrapper + * objects are also being deleted, which means the dentries will no + * longer be valid. Delete the existing entries so that we can create + * new ones with the same name. + */ + debugfs_remove(gsp->debugfs.init); + debugfs_remove(gsp->debugfs.intr); + debugfs_remove(gsp->debugfs.rm); + debugfs_remove(gsp->debugfs.pmu); + + ret = r535_gsp_copy_log(gsp->debugfs.parent, "loginit", &gsp->blob_init, &log->blob_init); + if (ret) + goto error; + + ret = r535_gsp_copy_log(gsp->debugfs.parent, "logintr", &gsp->blob_intr, &log->blob_intr); + if (ret) + goto error; + + ret = r535_gsp_copy_log(gsp->debugfs.parent, "logrm", &gsp->blob_rm, &log->blob_rm); + if (ret) + goto error; + + ret = r535_gsp_copy_log(gsp->debugfs.parent, "logpmu", &gsp->blob_pmu, &log->blob_pmu); + if (ret) + goto error; + + /* The nvkm_gsp object is going away, so save the dentry */ + log->debugfs_logging_dir = gsp->debugfs.parent; + + log->log.shutdown = r535_debugfs_shutdown; + list_add(&log->log.entry, &gsp_logs.head); + + nvkm_warn(&gsp->subdev, + "logging buffers migrated to /sys/kernel/debug/nouveau/%s\n", + dev_name(dev)); + + return; + +error: + nvkm_warn(&gsp->subdev, "failed to migrate logging buffers\n"); + +exit: + debugfs_remove(gsp->debugfs.parent); + + if (log) { + kfree(log->blob_init.data); + kfree(log->blob_intr.data); + kfree(log->blob_rm.data); + kfree(log->blob_pmu.data); + kfree(log); + } +} + +#endif + +/** + * r535_gsp_libos_debugfs_fini - cleanup/retain log buffers on shutdown + * @gsp: gsp pointer + * + * If the log buffers are exposed via debugfs, the data for those entries + * needs to be cleaned up when the GSP device shuts down. + */ +static void +r535_gsp_libos_debugfs_fini(struct nvkm_gsp __maybe_unused *gsp) +{ +#ifdef CONFIG_DEBUG_FS + r535_gsp_retain_logging(gsp); + + /* + * Unlike the other buffers, the PMU blob is a kmalloc'd buffer that + * exists only if the debugfs entries were created. + */ + kfree(gsp->blob_pmu.data); + gsp->blob_pmu.data = NULL; +#endif +} + void r535_gsp_dtor(struct nvkm_gsp *gsp) { @@ -2498,7 +2951,7 @@ r535_gsp_dtor(struct nvkm_gsp *gsp) mutex_destroy(&gsp->client_id.mutex); nvkm_gsp_radix3_dtor(gsp, &gsp->radix3); - nvkm_gsp_mem_dtor(gsp, &gsp->sig); + nvkm_gsp_mem_dtor(&gsp->sig); nvkm_firmware_dtor(&gsp->fw); nvkm_falcon_fw_dtor(&gsp->booter.unload); @@ -2509,12 +2962,15 @@ r535_gsp_dtor(struct nvkm_gsp *gsp) r535_gsp_dtor_fws(gsp); - nvkm_gsp_mem_dtor(gsp, &gsp->rmargs); - nvkm_gsp_mem_dtor(gsp, &gsp->wpr_meta); - nvkm_gsp_mem_dtor(gsp, &gsp->shm.mem); - nvkm_gsp_mem_dtor(gsp, &gsp->loginit); - nvkm_gsp_mem_dtor(gsp, &gsp->logintr); - nvkm_gsp_mem_dtor(gsp, &gsp->logrm); + nvkm_gsp_mem_dtor(&gsp->rmargs); + nvkm_gsp_mem_dtor(&gsp->wpr_meta); + nvkm_gsp_mem_dtor(&gsp->shm.mem); + + r535_gsp_libos_debugfs_fini(gsp); + + nvkm_gsp_mem_dtor(&gsp->loginit); + nvkm_gsp_mem_dtor(&gsp->logintr); + nvkm_gsp_mem_dtor(&gsp->logrm); } int diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index e27376121606..054b71dba6a7 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -28,7 +28,6 @@ #define DRIVER_NAME MODULE_NAME #define DRIVER_DESC "OMAP DRM" -#define DRIVER_DATE "20110917" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 #define DRIVER_PATCHLEVEL 0 @@ -653,7 +652,6 @@ static const struct drm_driver omap_drm_driver = { .fops = &omapdriver_fops, .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c index f4bd0c6e3f34..7b6396890681 100644 --- a/drivers/gpu/drm/omapdrm/omap_fbdev.c +++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c @@ -6,7 +6,7 @@ #include <linux/fb.h> -#include <drm/drm_client_setup.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_fb_helper.h> diff --git a/drivers/gpu/drm/panel/panel-edp.c b/drivers/gpu/drm/panel/panel-edp.c index 94a46241dece..f8511fe5fb0d 100644 --- a/drivers/gpu/drm/panel/panel-edp.c +++ b/drivers/gpu/drm/panel/panel-edp.c @@ -1802,6 +1802,12 @@ static const struct panel_delay delay_200_500_e50_po2e200 = { .powered_on_to_enable = 200, }; +static const struct panel_delay delay_200_150_e50 = { + .hpd_absent = 200, + .unprepare = 150, + .enable = 50, +}; + #define EDP_PANEL_ENTRY(vend_chr_0, vend_chr_1, vend_chr_2, product_id, _delay, _name) \ { \ .ident = { \ @@ -1913,6 +1919,7 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b56, &delay_200_500_e80, "NT140FHM-N47"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0b66, &delay_200_500_e80, "NE140WUM-N6G"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0c20, &delay_200_500_e80, "NT140FHM-N47"), + EDP_PANEL_ENTRY('B', 'O', 'E', 0x0c93, &delay_200_500_e200, "Unknown"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0cb6, &delay_200_500_e200, "NT116WHM-N44"), EDP_PANEL_ENTRY('B', 'O', 'E', 0x0cfa, &delay_200_500_e50, "NV116WHM-A4D"), @@ -1963,6 +1970,7 @@ static const struct edp_panel_entry edp_panels[] = { EDP_PANEL_ENTRY('K', 'D', 'B', 0x1118, &delay_200_500_e50, "KD116N29-30NK-A005"), EDP_PANEL_ENTRY('K', 'D', 'B', 0x1120, &delay_200_500_e80_d50, "116N29-30NK-C007"), EDP_PANEL_ENTRY('K', 'D', 'B', 0x1212, &delay_200_500_e50, "KD116N0930A16"), + EDP_PANEL_ENTRY('K', 'D', 'B', 0x1707, &delay_200_150_e50, "KD116N2130B12"), EDP_PANEL_ENTRY('K', 'D', 'C', 0x044f, &delay_200_500_e50, "KD116N9-30NH-F3"), EDP_PANEL_ENTRY('K', 'D', 'C', 0x05f1, &delay_200_500_e80_d50, "KD116N5-30NV-G7"), diff --git a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c index a9b5dad70bc1..87bbb25d119a 100644 --- a/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c +++ b/drivers/gpu/drm/panel/panel-orisetech-otm8009a.c @@ -9,6 +9,7 @@ #include <linux/backlight.h> #include <linux/delay.h> #include <linux/gpio/consumer.h> +#include <linux/mod_devicetable.h> #include <linux/module.h> #include <linux/regulator/consumer.h> diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c index 4618c892cdd6..e10e469aa7a6 100644 --- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c +++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c @@ -400,7 +400,7 @@ static int rpi_touchscreen_probe(struct i2c_client *i2c) rpi_touchscreen_i2c_write(ts, REG_POWERON, 0); /* Look up the DSI host. It needs to probe before we do. */ - endpoint = of_graph_get_next_endpoint(dev->of_node, NULL); + endpoint = of_graph_get_endpoint_by_regs(dev->of_node, 0, -1); if (!endpoint) return -ENODEV; diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c b/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c index ed53787d1dea..364f1c9a16d9 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e63j0x03.c @@ -11,6 +11,7 @@ #include <linux/backlight.h> #include <linux/delay.h> #include <linux/gpio/consumer.h> +#include <linux/mod_devicetable.h> #include <linux/module.h> #include <linux/regulator/consumer.h> diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c index a0e5698275a5..6917ffda5b2b 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c @@ -15,6 +15,7 @@ #include <linux/delay.h> #include <linux/gpio/consumer.h> #include <linux/module.h> +#include <linux/property.h> #include <linux/regulator/consumer.h> #include <linux/media-bus-format.h> diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 222c170dde8b..dbad12a354b6 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -3222,6 +3222,33 @@ static const struct panel_desc mitsubishi_aa084xe01 = { .bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_SAMPLE_NEGEDGE, }; +static const struct display_timing multi_inno_mi0700a2t_30_timing = { + .pixelclock = { 26400000, 33000000, 46800000 }, + .hactive = { 800, 800, 800 }, + .hfront_porch = { 16, 204, 354 }, + .hback_porch = { 46, 46, 46 }, + .hsync_len = { 1, 6, 40 }, + .vactive = { 480, 480, 480 }, + .vfront_porch = { 7, 22, 147 }, + .vback_porch = { 23, 23, 23 }, + .vsync_len = { 1, 3, 20 }, + .flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW | + DISPLAY_FLAGS_DE_HIGH, +}; + +static const struct panel_desc multi_inno_mi0700a2t_30 = { + .timings = &multi_inno_mi0700a2t_30_timing, + .num_timings = 1, + .bpc = 6, + .size = { + .width = 153, + .height = 92, + }, + .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .connector_type = DRM_MODE_CONNECTOR_LVDS, +}; + static const struct display_timing multi_inno_mi0700s4t_6_timing = { .pixelclock = { 29000000, 33000000, 38000000 }, .hactive = { 800, 800, 800 }, @@ -3313,6 +3340,33 @@ static const struct panel_desc multi_inno_mi1010ait_1cp = { .connector_type = DRM_MODE_CONNECTOR_LVDS, }; +static const struct display_timing multi_inno_mi1010z1t_1cp11_timing = { + .pixelclock = { 40800000, 51200000, 67200000 }, + .hactive = { 1024, 1024, 1024 }, + .hfront_porch = { 30, 110, 130 }, + .hback_porch = { 30, 110, 130 }, + .hsync_len = { 30, 100, 116 }, + .vactive = { 600, 600, 600 }, + .vfront_porch = { 4, 13, 80 }, + .vback_porch = { 4, 13, 80 }, + .vsync_len = { 2, 9, 40 }, + .flags = DISPLAY_FLAGS_HSYNC_LOW | DISPLAY_FLAGS_VSYNC_LOW | + DISPLAY_FLAGS_DE_HIGH, +}; + +static const struct panel_desc multi_inno_mi1010z1t_1cp11 = { + .timings = &multi_inno_mi1010z1t_1cp11_timing, + .num_timings = 1, + .bpc = 6, + .size = { + .width = 260, + .height = 162, + }, + .bus_format = MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, + .bus_flags = DRM_BUS_FLAG_DE_HIGH, + .connector_type = DRM_MODE_CONNECTOR_LVDS, +}; + static const struct display_timing nec_nl12880bc20_05_timing = { .pixelclock = { 67000000, 71000000, 75000000 }, .hactive = { 1280, 1280, 1280 }, @@ -4280,6 +4334,45 @@ static const struct panel_desc tianma_tm070jvhg33 = { .bus_flags = DRM_BUS_FLAG_DE_HIGH, }; +/* + * The datasheet computes total blanking as back porch + front porch, not + * including sync pulse width. This is for both H and V. To make the total + * blanking and period correct, subtract the pulse width from the front + * porch. + * + * This works well for the Min and Typ values, but for Max values the sync + * pulse width is higher than back porch + front porch, so work around that + * by reducing the Max sync length value to 1 and then treating the Max + * porches as in the Min and Typ cases. + * + * Exact datasheet values are added as a comment where they differ from the + * ones implemented for the above reason. + */ +static const struct display_timing tianma_tm070jdhg34_00_timing = { + .pixelclock = { 68400000, 71900000, 78100000 }, + .hactive = { 1280, 1280, 1280 }, + .hfront_porch = { 130, 138, 158 }, /* 131, 139, 159 */ + .hback_porch = { 5, 5, 5 }, + .hsync_len = { 1, 1, 1 }, /* 1, 1, 256 */ + .vactive = { 800, 800, 800 }, + .vfront_porch = { 2, 39, 98 }, /* 3, 40, 99 */ + .vback_porch = { 2, 2, 2 }, + .vsync_len = { 1, 1, 1 }, /* 1, 1, 128 */ + .flags = DISPLAY_FLAGS_DE_HIGH, +}; + +static const struct panel_desc tianma_tm070jdhg34_00 = { + .timings = &tianma_tm070jdhg34_00_timing, + .num_timings = 1, + .bpc = 8, + .size = { + .width = 150, /* 149.76 */ + .height = 94, /* 93.60 */ + }, + .bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, + .connector_type = DRM_MODE_CONNECTOR_LVDS, +}; + static const struct display_timing tianma_tm070rvhg71_timing = { .pixelclock = { 27700000, 29200000, 39600000 }, .hactive = { 800, 800, 800 }, @@ -4906,6 +4999,9 @@ static const struct of_device_id platform_of_match[] = { .compatible = "mitsubishi,aa084xe01", .data = &mitsubishi_aa084xe01, }, { + .compatible = "multi-inno,mi0700a2t-30", + .data = &multi_inno_mi0700a2t_30, + }, { .compatible = "multi-inno,mi0700s4t-6", .data = &multi_inno_mi0700s4t_6, }, { @@ -4915,6 +5011,9 @@ static const struct of_device_id platform_of_match[] = { .compatible = "multi-inno,mi1010ait-1cp", .data = &multi_inno_mi1010ait_1cp, }, { + .compatible = "multi-inno,mi1010z1t-1cp11", + .data = &multi_inno_mi1010z1t_1cp11, + }, { .compatible = "nec,nl12880bc20-05", .data = &nec_nl12880bc20_05, }, { @@ -5023,6 +5122,9 @@ static const struct of_device_id platform_of_match[] = { .compatible = "tianma,tm070jdhg30", .data = &tianma_tm070jdhg30, }, { + .compatible = "tianma,tm070jdhg34-00", + .data = &tianma_tm070jdhg34_00, + }, { .compatible = "tianma,tm070jvhg33", .data = &tianma_tm070jvhg33, }, { diff --git a/drivers/gpu/drm/panel/panel-visionox-rm69299.c b/drivers/gpu/drm/panel/panel-visionox-rm69299.c index 272490b9565b..be3a9797fbce 100644 --- a/drivers/gpu/drm/panel/panel-visionox-rm69299.c +++ b/drivers/gpu/drm/panel/panel-visionox-rm69299.c @@ -193,7 +193,6 @@ static int visionox_rm69299_probe(struct mipi_dsi_device *dsi) mipi_dsi_set_drvdata(dsi, ctx); - ctx->panel.dev = dev; ctx->dsi = dsi; ctx->supplies[0].supply = "vdda"; @@ -201,13 +200,11 @@ static int visionox_rm69299_probe(struct mipi_dsi_device *dsi) ctx->supplies[1].supply = "vdd3p3"; ctx->supplies[1].init_load_uA = 13200; - ret = devm_regulator_bulk_get(ctx->panel.dev, ARRAY_SIZE(ctx->supplies), - ctx->supplies); + ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ctx->supplies), ctx->supplies); if (ret < 0) return ret; - ctx->reset_gpio = devm_gpiod_get(ctx->panel.dev, - "reset", GPIOD_OUT_LOW); + ctx->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW); if (IS_ERR(ctx->reset_gpio)) { dev_err(dev, "cannot get reset gpio %ld\n", PTR_ERR(ctx->reset_gpio)); return PTR_ERR(ctx->reset_gpio); @@ -215,8 +212,6 @@ static int visionox_rm69299_probe(struct mipi_dsi_device *dsi) drm_panel_init(&ctx->panel, dev, &visionox_rm69299_drm_funcs, DRM_MODE_CONNECTOR_DSI); - ctx->panel.dev = dev; - ctx->panel.funcs = &visionox_rm69299_drm_funcs; drm_panel_add(&ctx->panel); dsi->lanes = 4; diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c index ee3864476eb9..0f3935556ac7 100644 --- a/drivers/gpu/drm/panfrost/panfrost_drv.c +++ b/drivers/gpu/drm/panfrost/panfrost_drv.c @@ -636,7 +636,6 @@ static const struct drm_driver panfrost_drm_driver = { .fops = &panfrost_drm_driver_fops, .name = "panfrost", .desc = "panfrost DRM", - .date = "20180908", .major = 1, .minor = 3, diff --git a/drivers/gpu/drm/panfrost/panfrost_gpu.c b/drivers/gpu/drm/panfrost/panfrost_gpu.c index f5abde3866fb..174e190ba40f 100644 --- a/drivers/gpu/drm/panfrost/panfrost_gpu.c +++ b/drivers/gpu/drm/panfrost/panfrost_gpu.c @@ -236,6 +236,10 @@ static const struct panfrost_model gpu_models[] = { */ GPU_MODEL(g57, 0x9003, GPU_REV(g57, 0, 0)), + + /* MediaTek MT8188 Mali-G57 MC3 */ + GPU_MODEL(g57, 0x9093, + GPU_REV(g57, 0, 0)), }; static void panfrost_gpu_init_features(struct panfrost_device *pfdev) diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.c b/drivers/gpu/drm/panthor/panthor_devfreq.c index ecc7a52bd688..3686515d368d 100644 --- a/drivers/gpu/drm/panthor/panthor_devfreq.c +++ b/drivers/gpu/drm/panthor/panthor_devfreq.c @@ -243,26 +243,26 @@ int panthor_devfreq_init(struct panthor_device *ptdev) return 0; } -int panthor_devfreq_resume(struct panthor_device *ptdev) +void panthor_devfreq_resume(struct panthor_device *ptdev) { struct panthor_devfreq *pdevfreq = ptdev->devfreq; if (!pdevfreq->devfreq) - return 0; + return; panthor_devfreq_reset(pdevfreq); - return devfreq_resume_device(pdevfreq->devfreq); + drm_WARN_ON(&ptdev->base, devfreq_resume_device(pdevfreq->devfreq)); } -int panthor_devfreq_suspend(struct panthor_device *ptdev) +void panthor_devfreq_suspend(struct panthor_device *ptdev) { struct panthor_devfreq *pdevfreq = ptdev->devfreq; if (!pdevfreq->devfreq) - return 0; + return; - return devfreq_suspend_device(pdevfreq->devfreq); + drm_WARN_ON(&ptdev->base, devfreq_suspend_device(pdevfreq->devfreq)); } void panthor_devfreq_record_busy(struct panthor_device *ptdev) diff --git a/drivers/gpu/drm/panthor/panthor_devfreq.h b/drivers/gpu/drm/panthor/panthor_devfreq.h index 83a5c9522493..b7631de695f7 100644 --- a/drivers/gpu/drm/panthor/panthor_devfreq.h +++ b/drivers/gpu/drm/panthor/panthor_devfreq.h @@ -12,8 +12,8 @@ struct panthor_devfreq; int panthor_devfreq_init(struct panthor_device *ptdev); -int panthor_devfreq_resume(struct panthor_device *ptdev); -int panthor_devfreq_suspend(struct panthor_device *ptdev); +void panthor_devfreq_resume(struct panthor_device *ptdev); +void panthor_devfreq_suspend(struct panthor_device *ptdev); void panthor_devfreq_record_busy(struct panthor_device *ptdev); void panthor_devfreq_record_idle(struct panthor_device *ptdev); diff --git a/drivers/gpu/drm/panthor/panthor_device.c b/drivers/gpu/drm/panthor/panthor_device.c index 6fbff516c1c1..0a37cfeeb181 100644 --- a/drivers/gpu/drm/panthor/panthor_device.c +++ b/drivers/gpu/drm/panthor/panthor_device.c @@ -22,6 +22,24 @@ #include "panthor_regs.h" #include "panthor_sched.h" +static int panthor_gpu_coherency_init(struct panthor_device *ptdev) +{ + ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT; + + if (!ptdev->coherent) + return 0; + + /* Check if the ACE-Lite coherency protocol is actually supported by the GPU. + * ACE protocol has never been supported for command stream frontend GPUs. + */ + if ((gpu_read(ptdev, GPU_COHERENCY_FEATURES) & + GPU_COHERENCY_PROT_BIT(ACE_LITE))) + return 0; + + drm_err(&ptdev->base, "Coherency not supported by the device"); + return -ENOTSUPP; +} + static int panthor_clk_init(struct panthor_device *ptdev) { ptdev->clks.core = devm_clk_get(ptdev->base.dev, NULL); @@ -156,7 +174,9 @@ int panthor_device_init(struct panthor_device *ptdev) struct page *p; int ret; - ptdev->coherent = device_get_dma_attr(ptdev->base.dev) == DEV_DMA_COHERENT; + ret = panthor_gpu_coherency_init(ptdev); + if (ret) + return ret; init_completion(&ptdev->unplug.done); ret = drmm_mutex_init(&ptdev->base, &ptdev->unplug.lock); @@ -415,6 +435,22 @@ int panthor_device_mmap_io(struct panthor_device *ptdev, struct vm_area_struct * return 0; } +static int panthor_device_resume_hw_components(struct panthor_device *ptdev) +{ + int ret; + + panthor_gpu_resume(ptdev); + panthor_mmu_resume(ptdev); + + ret = panthor_fw_resume(ptdev); + if (!ret) + return 0; + + panthor_mmu_suspend(ptdev); + panthor_gpu_suspend(ptdev); + return ret; +} + int panthor_device_resume(struct device *dev) { struct panthor_device *ptdev = dev_get_drvdata(dev); @@ -437,22 +473,20 @@ int panthor_device_resume(struct device *dev) if (ret) goto err_disable_stacks_clk; - ret = panthor_devfreq_resume(ptdev); - if (ret) - goto err_disable_coregroup_clk; + panthor_devfreq_resume(ptdev); if (panthor_device_is_initialized(ptdev) && drm_dev_enter(&ptdev->base, &cookie)) { - panthor_gpu_resume(ptdev); - panthor_mmu_resume(ptdev); - ret = drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev)); - if (!ret) { - panthor_sched_resume(ptdev); - } else { - panthor_mmu_suspend(ptdev); - panthor_gpu_suspend(ptdev); + ret = panthor_device_resume_hw_components(ptdev); + if (ret && ptdev->reset.fast) { + drm_err(&ptdev->base, "Fast reset failed, trying a slow reset"); + ptdev->reset.fast = false; + ret = panthor_device_resume_hw_components(ptdev); } + if (!ret) + panthor_sched_resume(ptdev); + drm_dev_exit(cookie); if (ret) @@ -476,8 +510,6 @@ int panthor_device_resume(struct device *dev) err_suspend_devfreq: panthor_devfreq_suspend(ptdev); - -err_disable_coregroup_clk: clk_disable_unprepare(ptdev->clks.coregroup); err_disable_stacks_clk: @@ -488,13 +520,14 @@ err_disable_core_clk: err_set_suspended: atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); + atomic_set(&ptdev->pm.recovery_needed, 1); return ret; } int panthor_device_suspend(struct device *dev) { struct panthor_device *ptdev = dev_get_drvdata(dev); - int ret, cookie; + int cookie; if (atomic_read(&ptdev->pm.state) != PANTHOR_DEVICE_PM_STATE_ACTIVE) return -EINVAL; @@ -526,36 +559,11 @@ int panthor_device_suspend(struct device *dev) drm_dev_exit(cookie); } - ret = panthor_devfreq_suspend(ptdev); - if (ret) { - if (panthor_device_is_initialized(ptdev) && - drm_dev_enter(&ptdev->base, &cookie)) { - panthor_gpu_resume(ptdev); - panthor_mmu_resume(ptdev); - drm_WARN_ON(&ptdev->base, panthor_fw_resume(ptdev)); - panthor_sched_resume(ptdev); - drm_dev_exit(cookie); - } - - goto err_set_active; - } + panthor_devfreq_suspend(ptdev); clk_disable_unprepare(ptdev->clks.coregroup); clk_disable_unprepare(ptdev->clks.stacks); clk_disable_unprepare(ptdev->clks.core); atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_SUSPENDED); return 0; - -err_set_active: - /* If something failed and we have to revert back to an - * active state, we also need to clear the MMIO userspace - * mappings, so any dumb pages that were mapped while we - * were trying to suspend gets invalidated. - */ - mutex_lock(&ptdev->pm.mmio_lock); - atomic_set(&ptdev->pm.state, PANTHOR_DEVICE_PM_STATE_ACTIVE); - unmap_mapping_range(ptdev->base.anon_inode->i_mapping, - DRM_PANTHOR_USER_MMIO_OFFSET, 0, 1); - mutex_unlock(&ptdev->pm.mmio_lock); - return ret; } diff --git a/drivers/gpu/drm/panthor/panthor_device.h b/drivers/gpu/drm/panthor/panthor_device.h index 0e68f5a70d20..da6574021664 100644 --- a/drivers/gpu/drm/panthor/panthor_device.h +++ b/drivers/gpu/drm/panthor/panthor_device.h @@ -9,6 +9,7 @@ #include <linux/atomic.h> #include <linux/io-pgtable.h> #include <linux/regulator/consumer.h> +#include <linux/pm_runtime.h> #include <linux/sched.h> #include <linux/spinlock.h> @@ -156,6 +157,17 @@ struct panthor_device { /** @pending: Set to true if a reset is pending. */ atomic_t pending; + + /** + * @fast: True if the post_reset logic can proceed with a fast reset. + * + * A fast reset is just a reset where the driver doesn't reload the FW sections. + * + * Any time the firmware is properly suspended, a fast reset can take place. + * On the other hand, if the halt operation failed, the driver will reload + * all FW sections to make sure we start from a fresh state. + */ + bool fast; } reset; /** @pm: Power management related data. */ @@ -180,6 +192,9 @@ struct panthor_device { * is suspended. */ struct page *dummy_latest_flush; + + /** @recovery_needed: True when a resume attempt failed. */ + atomic_t recovery_needed; } pm; /** @profile_mask: User-set profiling flags for job accounting. */ @@ -243,6 +258,28 @@ int panthor_device_mmap_io(struct panthor_device *ptdev, int panthor_device_resume(struct device *dev); int panthor_device_suspend(struct device *dev); +static inline int panthor_device_resume_and_get(struct panthor_device *ptdev) +{ + int ret = pm_runtime_resume_and_get(ptdev->base.dev); + + /* If the resume failed, we need to clear the runtime_error, which + * can done by forcing the RPM state to suspended. If multiple + * threads called panthor_device_resume_and_get(), we only want + * one of them to update the state, hence the cmpxchg. Note that a + * thread might enter panthor_device_resume_and_get() and call + * pm_runtime_resume_and_get() after another thread had attempted + * to resume and failed. This means we will end up with an error + * without even attempting a resume ourselves. The only risk here + * is to report an error when the second resume attempt might have + * succeeded. Given resume errors are not expected, this is probably + * something we can live with. + */ + if (ret && atomic_cmpxchg(&ptdev->pm.recovery_needed, 1, 0) == 1) + pm_runtime_set_suspended(ptdev->base.dev); + + return ret; +} + enum drm_panthor_exception_type { DRM_PANTHOR_EXCEPTION_OK = 0x00, DRM_PANTHOR_EXCEPTION_TERMINATED = 0x04, diff --git a/drivers/gpu/drm/panthor/panthor_drv.c b/drivers/gpu/drm/panthor/panthor_drv.c index 0b3fbee3d37a..d5dcd3d1b33a 100644 --- a/drivers/gpu/drm/panthor/panthor_drv.c +++ b/drivers/gpu/drm/panthor/panthor_drv.c @@ -763,7 +763,7 @@ static int panthor_query_timestamp_info(struct panthor_device *ptdev, { int ret; - ret = pm_runtime_resume_and_get(ptdev->base.dev); + ret = panthor_device_resume_and_get(ptdev); if (ret) return ret; @@ -1493,6 +1493,7 @@ static void panthor_debugfs_init(struct drm_minor *minor) * - 1.1 - adds DEV_QUERY_TIMESTAMP_INFO query * - 1.2 - adds DEV_QUERY_GROUP_PRIORITIES_INFO query * - adds PANTHOR_GROUP_PRIORITY_REALTIME priority + * - 1.3 - adds DRM_PANTHOR_GROUP_STATE_INNOCENT flag */ static const struct drm_driver panthor_drm_driver = { .driver_features = DRIVER_RENDER | DRIVER_GEM | DRIVER_SYNCOBJ | @@ -1505,9 +1506,8 @@ static const struct drm_driver panthor_drm_driver = { .fops = &panthor_drm_driver_fops, .name = "panthor", .desc = "Panthor DRM driver", - .date = "20230801", .major = 1, - .minor = 2, + .minor = 3, .gem_create_object = panthor_gem_create_object, .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table, diff --git a/drivers/gpu/drm/panthor/panthor_fw.c b/drivers/gpu/drm/panthor/panthor_fw.c index ecca5565ce41..68eb4fb4d3a8 100644 --- a/drivers/gpu/drm/panthor/panthor_fw.c +++ b/drivers/gpu/drm/panthor/panthor_fw.c @@ -12,6 +12,7 @@ #include <linux/iosys-map.h> #include <linux/mutex.h> #include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <drm/drm_drv.h> #include <drm/drm_managed.h> @@ -91,26 +92,26 @@ enum panthor_fw_binary_entry_type { #define CSF_FW_BINARY_ENTRY_UPDATE BIT(30) #define CSF_FW_BINARY_ENTRY_OPTIONAL BIT(31) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_RD BIT(0) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_WR BIT(1) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_EX BIT(2) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_NONE (0 << 3) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED (1 << 3) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_UNCACHED_COHERENT (2 << 3) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED_COHERENT (3 << 3) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK GENMASK(4, 3) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_PROT BIT(5) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED BIT(30) -#define CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO BIT(31) - -#define CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS \ - (CSF_FW_BINARY_IFACE_ENTRY_RD_RD | \ - CSF_FW_BINARY_IFACE_ENTRY_RD_WR | \ - CSF_FW_BINARY_IFACE_ENTRY_RD_EX | \ - CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK | \ - CSF_FW_BINARY_IFACE_ENTRY_RD_PROT | \ - CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED | \ - CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) +#define CSF_FW_BINARY_IFACE_ENTRY_RD BIT(0) +#define CSF_FW_BINARY_IFACE_ENTRY_WR BIT(1) +#define CSF_FW_BINARY_IFACE_ENTRY_EX BIT(2) +#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_NONE (0 << 3) +#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED (1 << 3) +#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_UNCACHED_COHERENT (2 << 3) +#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED_COHERENT (3 << 3) +#define CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK GENMASK(4, 3) +#define CSF_FW_BINARY_IFACE_ENTRY_PROT BIT(5) +#define CSF_FW_BINARY_IFACE_ENTRY_SHARED BIT(30) +#define CSF_FW_BINARY_IFACE_ENTRY_ZERO BIT(31) + +#define CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS \ + (CSF_FW_BINARY_IFACE_ENTRY_RD | \ + CSF_FW_BINARY_IFACE_ENTRY_WR | \ + CSF_FW_BINARY_IFACE_ENTRY_EX | \ + CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK | \ + CSF_FW_BINARY_IFACE_ENTRY_PROT | \ + CSF_FW_BINARY_IFACE_ENTRY_SHARED | \ + CSF_FW_BINARY_IFACE_ENTRY_ZERO) /** * struct panthor_fw_binary_section_entry_hdr - Describes a section of FW binary @@ -262,17 +263,6 @@ struct panthor_fw { /** @booted: True is the FW is booted */ bool booted; - /** - * @fast_reset: True if the post_reset logic can proceed with a fast reset. - * - * A fast reset is just a reset where the driver doesn't reload the FW sections. - * - * Any time the firmware is properly suspended, a fast reset can take place. - * On the other hand, if the halt operation failed, the driver will reload - * all sections to make sure we start from a fresh state. - */ - bool fast_reset; - /** @irq: Job irq data. */ struct panthor_irq irq; }; @@ -413,7 +403,7 @@ static void panthor_fw_init_section_mem(struct panthor_device *ptdev, int ret; if (!section->data.size && - !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO)) + !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO)) return; ret = panthor_kernel_bo_vmap(section->mem); @@ -421,7 +411,7 @@ static void panthor_fw_init_section_mem(struct panthor_device *ptdev, return; memcpy(section->mem->kmap, section->data.buf, section->data.size); - if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_ZERO) { + if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_ZERO) { memset(section->mem->kmap + section->data.size, 0, panthor_kernel_bo_size(section->mem) - section->data.size); } @@ -535,20 +525,20 @@ static int panthor_fw_load_section_entry(struct panthor_device *ptdev, return -EINVAL; } - if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_RD_SUPPORTED_FLAGS) { + if (hdr.flags & ~CSF_FW_BINARY_IFACE_ENTRY_SUPPORTED_FLAGS) { drm_err(&ptdev->base, "Firmware contains interface with unsupported flags (0x%x)\n", hdr.flags); return -EINVAL; } - if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_PROT) { + if (hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_PROT) { drm_warn(&ptdev->base, "Firmware protected mode entry not be supported, ignoring"); return 0; } if (hdr.va.start == CSF_MCU_SHARED_REGION_START && - !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED)) { + !(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED)) { drm_err(&ptdev->base, "Interface at 0x%llx must be shared", CSF_MCU_SHARED_REGION_START); return -EINVAL; @@ -587,26 +577,26 @@ static int panthor_fw_load_section_entry(struct panthor_device *ptdev, section_size = hdr.va.end - hdr.va.start; if (section_size) { - u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_MASK; + u32 cache_mode = hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_MASK; struct panthor_gem_object *bo; u32 vm_map_flags = 0; struct sg_table *sgt; u64 va = hdr.va.start; - if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) + if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_WR)) vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_READONLY; - if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_RD_EX)) + if (!(hdr.flags & CSF_FW_BINARY_IFACE_ENTRY_EX)) vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC; - /* TODO: CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_*_COHERENT are mapped to + /* TODO: CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_*_COHERENT are mapped to * non-cacheable for now. We might want to introduce a new * IOMMU_xxx flag (or abuse IOMMU_MMIO, which maps to device * memory and is currently not used by our driver) for * AS_MEMATTR_AARCH64_SHARED memory, so we can take benefit * of IO-coherent systems. */ - if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_RD_CACHE_MODE_CACHED) + if (cache_mode != CSF_FW_BINARY_IFACE_ENTRY_CACHE_MODE_CACHED) vm_map_flags |= DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED; section->mem = panthor_kernel_bo_create(ptdev, panthor_fw_vm(ptdev), @@ -619,7 +609,7 @@ static int panthor_fw_load_section_entry(struct panthor_device *ptdev, if (drm_WARN_ON(&ptdev->base, section->mem->va_node.start != hdr.va.start)) return -EINVAL; - if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_SHARED) { + if (section->flags & CSF_FW_BINARY_IFACE_ENTRY_SHARED) { ret = panthor_kernel_bo_vmap(section->mem); if (ret) return ret; @@ -689,7 +679,7 @@ panthor_reload_fw_sections(struct panthor_device *ptdev, bool full_reload) list_for_each_entry(section, &ptdev->fw->sections, node) { struct sg_table *sgt; - if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_RD_WR)) + if (!full_reload && !(section->flags & CSF_FW_BINARY_IFACE_ENTRY_WR)) continue; panthor_fw_init_section_mem(ptdev, section); @@ -1089,7 +1079,7 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) /* Make sure we won't be woken up by a ping. */ cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); - ptdev->fw->fast_reset = false; + ptdev->reset.fast = false; if (!on_hang) { struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); @@ -1098,17 +1088,11 @@ void panthor_fw_pre_reset(struct panthor_device *ptdev, bool on_hang) panthor_fw_update_reqs(glb_iface, req, GLB_HALT, GLB_HALT); gpu_write(ptdev, CSF_DOORBELL(CSF_GLB_DOORBELL_ID), 1); if (!readl_poll_timeout(ptdev->iomem + MCU_STATUS, status, - status == MCU_STATUS_HALT, 10, 100000) && - glb_iface->output->halt_status == PANTHOR_FW_HALT_OK) { - ptdev->fw->fast_reset = true; + status == MCU_STATUS_HALT, 10, 100000)) { + ptdev->reset.fast = true; } else { drm_warn(&ptdev->base, "Failed to cleanly suspend MCU"); } - - /* The FW detects 0 -> 1 transitions. Make sure we reset - * the HALT bit before the FW is rebooted. - */ - panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); } panthor_job_irq_suspend(&ptdev->fw->irq); @@ -1130,41 +1114,30 @@ int panthor_fw_post_reset(struct panthor_device *ptdev) if (ret) return ret; - /* If this is a fast reset, try to start the MCU without reloading - * the FW sections. If it fails, go for a full reset. - */ - if (ptdev->fw->fast_reset) { - ret = panthor_fw_start(ptdev); - if (!ret) - goto out; - - /* Forcibly reset the MCU and force a slow reset, so we get a - * fresh boot on the next panthor_fw_start() call. + if (!ptdev->reset.fast) { + /* On a slow reset, reload all sections, including RO ones. + * We're not supposed to end up here anyway, let's just assume + * the overhead of reloading everything is acceptable. */ - panthor_fw_stop(ptdev); - ptdev->fw->fast_reset = false; - drm_err(&ptdev->base, "FW fast reset failed, trying a slow reset"); + panthor_reload_fw_sections(ptdev, true); + } else { + /* The FW detects 0 -> 1 transitions. Make sure we reset + * the HALT bit before the FW is rebooted. + * This is not needed on a slow reset because FW sections are + * re-initialized. + */ + struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev); - ret = panthor_vm_flush_all(ptdev->fw->vm); - if (ret) { - drm_err(&ptdev->base, "FW slow reset failed (couldn't flush FW's AS l2cache)"); - return ret; - } + panthor_fw_update_reqs(glb_iface, req, 0, GLB_HALT); } - /* Reload all sections, including RO ones. We're not supposed - * to end up here anyway, let's just assume the overhead of - * reloading everything is acceptable. - */ - panthor_reload_fw_sections(ptdev, true); - ret = panthor_fw_start(ptdev); if (ret) { - drm_err(&ptdev->base, "FW slow reset failed (couldn't start the FW )"); + drm_err(&ptdev->base, "FW %s reset failed", + ptdev->reset.fast ? "fast" : "slow"); return ret; } -out: /* We must re-initialize the global interface even on fast-reset. */ panthor_fw_init_global_iface(ptdev); return 0; @@ -1188,11 +1161,13 @@ void panthor_fw_unplug(struct panthor_device *ptdev) cancel_delayed_work_sync(&ptdev->fw->watchdog.ping_work); - /* Make sure the IRQ handler can be called after that point. */ - if (ptdev->fw->irq.irq) - panthor_job_irq_suspend(&ptdev->fw->irq); + if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) { + /* Make sure the IRQ handler cannot be called after that point. */ + if (ptdev->fw->irq.irq) + panthor_job_irq_suspend(&ptdev->fw->irq); - panthor_fw_stop(ptdev); + panthor_fw_stop(ptdev); + } list_for_each_entry(section, &ptdev->fw->sections, node) panthor_kernel_bo_destroy(section->mem); @@ -1205,7 +1180,8 @@ void panthor_fw_unplug(struct panthor_device *ptdev) panthor_vm_put(ptdev->fw->vm); ptdev->fw->vm = NULL; - panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); + if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) + panthor_gpu_power_off(ptdev, L2, ptdev->gpu_info.l2_present, 20000); } /** diff --git a/drivers/gpu/drm/panthor/panthor_gpu.c b/drivers/gpu/drm/panthor/panthor_gpu.c index 2d3529a0b156..671049020afa 100644 --- a/drivers/gpu/drm/panthor/panthor_gpu.c +++ b/drivers/gpu/drm/panthor/panthor_gpu.c @@ -77,6 +77,12 @@ static const struct panthor_model gpu_models[] = { GPU_IRQ_RESET_COMPLETED | \ GPU_IRQ_CLEAN_CACHES_COMPLETED) +static void panthor_gpu_coherency_set(struct panthor_device *ptdev) +{ + gpu_write(ptdev, GPU_COHERENCY_PROTOCOL, + ptdev->coherent ? GPU_COHERENCY_PROT_BIT(ACE_LITE) : GPU_COHERENCY_NONE); +} + static void panthor_gpu_init_info(struct panthor_device *ptdev) { const struct panthor_model *model; @@ -174,7 +180,8 @@ void panthor_gpu_unplug(struct panthor_device *ptdev) unsigned long flags; /* Make sure the IRQ handler is not running after that point. */ - panthor_gpu_irq_suspend(&ptdev->gpu->irq); + if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) + panthor_gpu_irq_suspend(&ptdev->gpu->irq); /* Wake-up all waiters. */ spin_lock_irqsave(&ptdev->gpu->reqs_lock, flags); @@ -365,6 +372,9 @@ int panthor_gpu_l2_power_on(struct panthor_device *ptdev) hweight64(ptdev->gpu_info.shader_present)); } + /* Set the desired coherency mode before the power up of L2 */ + panthor_gpu_coherency_set(ptdev); + return panthor_gpu_power_on(ptdev, L2, 1, 20000); } @@ -460,11 +470,12 @@ int panthor_gpu_soft_reset(struct panthor_device *ptdev) */ void panthor_gpu_suspend(struct panthor_device *ptdev) { - /* - * It may be preferable to simply power down the L2, but for now just - * soft-reset which will leave the L2 powered down. - */ - panthor_gpu_soft_reset(ptdev); + /* On a fast reset, simply power down the L2. */ + if (!ptdev->reset.fast) + panthor_gpu_soft_reset(ptdev); + else + panthor_gpu_power_off(ptdev, L2, 1, 20000); + panthor_gpu_irq_suspend(&ptdev->gpu->irq); } diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index a49132f3778b..c39e3eb1c15d 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -1941,7 +1941,7 @@ struct panthor_heap_pool *panthor_vm_get_heap_pool(struct panthor_vm *vm, bool c return pool; } -static u64 mair_to_memattr(u64 mair) +static u64 mair_to_memattr(u64 mair, bool coherent) { u64 memattr = 0; u32 i; @@ -1960,14 +1960,21 @@ static u64 mair_to_memattr(u64 mair) AS_MEMATTR_AARCH64_SH_MIDGARD_INNER | AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false); } else { - /* Use SH_CPU_INNER mode so SH_IS, which is used when - * IOMMU_CACHE is set, actually maps to the standard - * definition of inner-shareable and not Mali's - * internal-shareable mode. - */ out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB | - AS_MEMATTR_AARCH64_SH_CPU_INNER | AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & 1, inner & 2); + /* Use SH_MIDGARD_INNER mode when device isn't coherent, + * so SH_IS, which is used when IOMMU_CACHE is set, maps + * to Mali's internal-shareable mode. As per the Mali + * Spec, inner and outer-shareable modes aren't allowed + * for WB memory when coherency is disabled. + * Use SH_CPU_INNER mode when coherency is enabled, so + * that SH_IS actually maps to the standard definition of + * inner-shareable. + */ + if (!coherent) + out_attr |= AS_MEMATTR_AARCH64_SH_MIDGARD_INNER; + else + out_attr |= AS_MEMATTR_AARCH64_SH_CPU_INNER; } memattr |= (u64)out_attr << (8 * i); @@ -2339,7 +2346,7 @@ panthor_vm_create(struct panthor_device *ptdev, bool for_mcu, goto err_sched_fini; mair = io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg.arm_lpae_s1_cfg.mair; - vm->memattr = mair_to_memattr(mair); + vm->memattr = mair_to_memattr(mair, ptdev->coherent); mutex_lock(&ptdev->mmu->vm.lock); list_add_tail(&vm->node, &ptdev->mmu->vm.list); @@ -2665,7 +2672,8 @@ int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec *exec, struct panthor_vm */ void panthor_mmu_unplug(struct panthor_device *ptdev) { - panthor_mmu_irq_suspend(&ptdev->mmu->irq); + if (!IS_ENABLED(CONFIG_PM) || pm_runtime_active(ptdev->base.dev)) + panthor_mmu_irq_suspend(&ptdev->mmu->irq); mutex_lock(&ptdev->mmu->as.slots_lock); for (u32 i = 0; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) { diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index ef4bec7ff9c7..77b184c3fb0c 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -611,6 +611,16 @@ struct panthor_group { bool timedout; /** + * @innocent: True when the group becomes unusable because the group suspension + * failed during a reset. + * + * Sometimes the FW was put in a bad state by other groups, causing the group + * suspension happening in the reset path to fail. In that case, we consider the + * group innocent. + */ + bool innocent; + + /** * @syncobjs: Pool of per-queue synchronization objects. * * One sync object per queue. The position of the sync object is @@ -2354,7 +2364,7 @@ static void tick_work(struct work_struct *work) if (!drm_dev_enter(&ptdev->base, &cookie)) return; - ret = pm_runtime_resume_and_get(ptdev->base.dev); + ret = panthor_device_resume_and_get(ptdev); if (drm_WARN_ON(&ptdev->base, ret)) goto out_dev_exit; @@ -2690,6 +2700,12 @@ void panthor_sched_suspend(struct panthor_device *ptdev) u32 csg_id = ffs(slot_mask) - 1; struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id]; + /* If the group was still usable before that point, we consider + * it innocent. + */ + if (group_can_run(csg_slot->group)) + csg_slot->group->innocent = true; + /* We consider group suspension failures as fatal and flag the * group as unusable by setting timedout=true. */ @@ -3115,7 +3131,7 @@ queue_run_job(struct drm_sched_job *sched_job) return dma_fence_get(job->done_fence); } - ret = pm_runtime_resume_and_get(ptdev->base.dev); + ret = panthor_device_resume_and_get(ptdev); if (drm_WARN_ON(&ptdev->base, ret)) return ERR_PTR(ret); @@ -3570,6 +3586,8 @@ int panthor_group_get_state(struct panthor_file *pfile, get_state->state |= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT; get_state->fatal_queues = group->fatal_queues; } + if (group->innocent) + get_state->state |= DRM_PANTHOR_GROUP_STATE_INNOCENT; mutex_unlock(&sched->lock); group_put(group); diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c index 13362150b9c6..56ff6a3fb483 100644 --- a/drivers/gpu/drm/pl111/pl111_drv.c +++ b/drivers/gpu/drm/pl111/pl111_drv.c @@ -45,9 +45,9 @@ #include <linux/shmem_fs.h> #include <linux/slab.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_bridge.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_fourcc.h> @@ -220,7 +220,6 @@ static const struct drm_driver pl111_drm_driver = { .fops = &drm_fops, .name = "pl111", .desc = DRIVER_DESC, - .date = "20170317", .major = 1, .minor = 0, .patchlevel = 0, diff --git a/drivers/gpu/drm/qxl/Kconfig b/drivers/gpu/drm/qxl/Kconfig index 98a148bea628..69427eb8bed2 100644 --- a/drivers/gpu/drm/qxl/Kconfig +++ b/drivers/gpu/drm/qxl/Kconfig @@ -6,6 +6,7 @@ config DRM_QXL select DRM_KMS_HELPER select DRM_TTM select DRM_TTM_HELPER + select DRM_EXEC select CRC32 help QXL virtual GPU for Spice virtualization desktop integration. diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index 21f752644242..417061ae59eb 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -34,9 +34,9 @@ #include <linux/pci.h> #include <linux/vgaarb.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_ttm.h> #include <drm/drm_file.h> @@ -300,7 +300,6 @@ static struct drm_driver qxl_driver = { .num_ioctls = ARRAY_SIZE(qxl_ioctls), .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = 0, .minor = 1, .patchlevel = 0, diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 32069acd93f8..cc02b5f10ad9 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -38,12 +38,12 @@ #include <drm/drm_crtc.h> #include <drm/drm_encoder.h> +#include <drm/drm_exec.h> #include <drm/drm_gem_ttm_helper.h> #include <drm/drm_ioctl.h> #include <drm/drm_gem.h> #include <drm/qxl_drm.h> #include <drm/ttm/ttm_bo.h> -#include <drm/ttm/ttm_execbuf_util.h> #include <drm/ttm/ttm_placement.h> #include "qxl_dev.h" @@ -54,7 +54,6 @@ struct iosys_map; #define DRIVER_NAME "qxl" #define DRIVER_DESC "RH QXL" -#define DRIVER_DATE "20120117" #define DRIVER_MAJOR 0 #define DRIVER_MINOR 1 @@ -101,7 +100,8 @@ struct qxl_gem { }; struct qxl_bo_list { - struct ttm_validate_buffer tv; + struct qxl_bo *bo; + struct list_head list; }; struct qxl_crtc { @@ -150,7 +150,7 @@ struct qxl_release { struct qxl_bo *release_bo; uint32_t release_offset; uint32_t surface_release_id; - struct ww_acquire_ctx ticket; + struct drm_exec exec; struct list_head bos; }; diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 368d26da0d6a..05204a6a3fa8 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -121,13 +121,11 @@ qxl_release_free_list(struct qxl_release *release) { while (!list_empty(&release->bos)) { struct qxl_bo_list *entry; - struct qxl_bo *bo; entry = container_of(release->bos.next, - struct qxl_bo_list, tv.head); - bo = to_qxl_bo(entry->tv.bo); - qxl_bo_unref(&bo); - list_del(&entry->tv.head); + struct qxl_bo_list, list); + qxl_bo_unref(&entry->bo); + list_del(&entry->list); kfree(entry); } release->release_bo = NULL; @@ -172,8 +170,8 @@ int qxl_release_list_add(struct qxl_release *release, struct qxl_bo *bo) { struct qxl_bo_list *entry; - list_for_each_entry(entry, &release->bos, tv.head) { - if (entry->tv.bo == &bo->tbo) + list_for_each_entry(entry, &release->bos, list) { + if (entry->bo == bo) return 0; } @@ -182,9 +180,8 @@ int qxl_release_list_add(struct qxl_release *release, struct qxl_bo *bo) return -ENOMEM; qxl_bo_ref(bo); - entry->tv.bo = &bo->tbo; - entry->tv.num_shared = 0; - list_add_tail(&entry->tv.head, &release->bos); + entry->bo = bo; + list_add_tail(&entry->list, &release->bos); return 0; } @@ -221,21 +218,28 @@ int qxl_release_reserve_list(struct qxl_release *release, bool no_intr) if (list_is_singular(&release->bos)) return 0; - ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos, - !no_intr, NULL); - if (ret) - return ret; - - list_for_each_entry(entry, &release->bos, tv.head) { - struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); - - ret = qxl_release_validate_bo(bo); - if (ret) { - ttm_eu_backoff_reservation(&release->ticket, &release->bos); - return ret; + drm_exec_init(&release->exec, no_intr ? 0 : + DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&release->exec) { + list_for_each_entry(entry, &release->bos, list) { + ret = drm_exec_prepare_obj(&release->exec, + &entry->bo->tbo.base, + 1); + drm_exec_retry_on_contention(&release->exec); + if (ret) + goto error; } } + + list_for_each_entry(entry, &release->bos, list) { + ret = qxl_release_validate_bo(entry->bo); + if (ret) + goto error; + } return 0; +error: + drm_exec_fini(&release->exec); + return ret; } void qxl_release_backoff_reserve_list(struct qxl_release *release) @@ -245,7 +249,7 @@ void qxl_release_backoff_reserve_list(struct qxl_release *release) if (list_is_singular(&release->bos)) return; - ttm_eu_backoff_reservation(&release->ticket, &release->bos); + drm_exec_fini(&release->exec); } int qxl_alloc_surface_release_reserved(struct qxl_device *qdev, @@ -404,18 +408,18 @@ void qxl_release_unmap(struct qxl_device *qdev, void qxl_release_fence_buffer_objects(struct qxl_release *release) { - struct ttm_buffer_object *bo; struct ttm_device *bdev; - struct ttm_validate_buffer *entry; + struct qxl_bo_list *entry; struct qxl_device *qdev; + struct qxl_bo *bo; /* if only one object on the release its the release itself since these objects are pinned no need to reserve */ if (list_is_singular(&release->bos) || list_empty(&release->bos)) return; - bo = list_first_entry(&release->bos, struct ttm_validate_buffer, head)->bo; - bdev = bo->bdev; + bo = list_first_entry(&release->bos, struct qxl_bo_list, list)->bo; + bdev = bo->tbo.bdev; qdev = container_of(bdev, struct qxl_device, mman.bdev); /* @@ -426,14 +430,12 @@ void qxl_release_fence_buffer_objects(struct qxl_release *release) release->id | 0xf0000000, release->base.seqno); trace_dma_fence_emit(&release->base); - list_for_each_entry(entry, &release->bos, head) { + list_for_each_entry(entry, &release->bos, list) { bo = entry->bo; - dma_resv_add_fence(bo->base.resv, &release->base, + dma_resv_add_fence(bo->tbo.base.resv, &release->base, DMA_RESV_USAGE_READ); - ttm_bo_move_to_lru_tail_unlocked(bo); - dma_resv_unlock(bo->base.resv); + ttm_bo_move_to_lru_tail_unlocked(&bo->tbo); } - ww_acquire_fini(&release->ticket); + drm_exec_fini(&release->exec); } - diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig index 9c6c74a75778..f51bace9555d 100644 --- a/drivers/gpu/drm/radeon/Kconfig +++ b/drivers/gpu/drm/radeon/Kconfig @@ -13,6 +13,7 @@ config DRM_RADEON select DRM_TTM select DRM_TTM_HELPER select FB_IOMEM_HELPERS if DRM_FBDEV_EMULATION + select DRM_EXEC select SND_HDA_COMPONENT if SND_HDA_CORE select POWER_SUPPLY select HWMON diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index fd8a4513025f..8605c074d9f7 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -75,8 +75,8 @@ #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_placement.h> -#include <drm/ttm/ttm_execbuf_util.h> +#include <drm/drm_exec.h> #include <drm/drm_gem.h> #include <drm/drm_audio_component.h> #include <drm/drm_suballoc.h> @@ -457,7 +457,8 @@ struct radeon_mman { struct radeon_bo_list { struct radeon_bo *robj; - struct ttm_validate_buffer tv; + struct list_head list; + bool shared; uint64_t gpu_offset; unsigned preferred_domains; unsigned allowed_domains; @@ -1030,6 +1031,7 @@ struct radeon_cs_parser { struct radeon_bo_list *vm_bos; struct list_head validated; unsigned dma_reloc_idx; + struct drm_exec exec; /* indices of various chunks */ struct radeon_cs_chunk *chunk_ib; struct radeon_cs_chunk *chunk_relocs; @@ -1043,7 +1045,6 @@ struct radeon_cs_parser { u32 cs_flags; u32 ring; s32 priority; - struct ww_acquire_ctx ticket; }; static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c index 5b69cc8011b4..8d64ba18572e 100644 --- a/drivers/gpu/drm/radeon/radeon_audio.c +++ b/drivers/gpu/drm/radeon/radeon_audio.c @@ -775,8 +775,10 @@ static int radeon_audio_component_get_eld(struct device *kdev, int port, if (!dig->pin || dig->pin->id != port) continue; *enabled = true; + mutex_lock(&connector->eld_mutex); ret = drm_eld_size(connector->eld); memcpy(buf, connector->eld, min(max_bytes, ret)); + mutex_unlock(&connector->eld_mutex); break; } diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index a6700d7278bf..64b26bfeafc9 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -182,11 +182,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) } } - p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; - p->relocs[i].tv.num_shared = !r->write_domain; - - radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head, - priority); + p->relocs[i].shared = !r->write_domain; + radeon_cs_buckets_add(&buckets, &p->relocs[i].list, priority); } radeon_cs_buckets_get_list(&buckets, &p->validated); @@ -197,7 +194,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) if (need_mmap_lock) mmap_read_lock(current->mm); - r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); + r = radeon_bo_list_validate(p->rdev, &p->exec, &p->validated, p->ring); if (need_mmap_lock) mmap_read_unlock(current->mm); @@ -253,12 +250,11 @@ static int radeon_cs_sync_rings(struct radeon_cs_parser *p) struct radeon_bo_list *reloc; int r; - list_for_each_entry(reloc, &p->validated, tv.head) { + list_for_each_entry(reloc, &p->validated, list) { struct dma_resv *resv; resv = reloc->robj->tbo.base.resv; - r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, - reloc->tv.num_shared); + r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, reloc->shared); if (r) return r; } @@ -276,6 +272,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) s32 priority = 0; INIT_LIST_HEAD(&p->validated); + drm_exec_init(&p->exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); if (!cs->num_chunks) { return 0; @@ -397,8 +394,8 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) static int cmp_size_smaller_first(void *priv, const struct list_head *a, const struct list_head *b) { - struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head); - struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head); + struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, list); + struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, list); /* Sort A before B if A is smaller. */ if (la->robj->tbo.base.size > lb->robj->tbo.base.size) @@ -417,11 +414,13 @@ static int cmp_size_smaller_first(void *priv, const struct list_head *a, * If error is set than unvalidate buffer, otherwise just free memory * used by parsing context. **/ -static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bool backoff) +static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error) { unsigned i; if (!error) { + struct radeon_bo_list *reloc; + /* Sort the buffer list from the smallest to largest buffer, * which affects the order of buffers in the LRU list. * This assures that the smallest buffers are added first @@ -433,15 +432,17 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo * per frame under memory pressure. */ list_sort(NULL, &parser->validated, cmp_size_smaller_first); - - ttm_eu_fence_buffer_objects(&parser->ticket, - &parser->validated, - &parser->ib.fence->base); - } else if (backoff) { - ttm_eu_backoff_reservation(&parser->ticket, - &parser->validated); + list_for_each_entry(reloc, &parser->validated, list) { + dma_resv_add_fence(reloc->robj->tbo.base.resv, + &parser->ib.fence->base, + reloc->shared ? + DMA_RESV_USAGE_READ : + DMA_RESV_USAGE_WRITE); + } } + drm_exec_fini(&parser->exec); + if (parser->relocs != NULL) { for (i = 0; i < parser->nrelocs; i++) { struct radeon_bo *bo = parser->relocs[i].robj; @@ -693,7 +694,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = radeon_cs_parser_init(&parser, data); if (r) { DRM_ERROR("Failed to initialize parser !\n"); - radeon_cs_parser_fini(&parser, r, false); + radeon_cs_parser_fini(&parser, r); up_read(&rdev->exclusive_lock); r = radeon_cs_handle_lockup(rdev, r); return r; @@ -707,7 +708,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } if (r) { - radeon_cs_parser_fini(&parser, r, false); + radeon_cs_parser_fini(&parser, r); up_read(&rdev->exclusive_lock); r = radeon_cs_handle_lockup(rdev, r); return r; @@ -724,7 +725,7 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } out: - radeon_cs_parser_fini(&parser, r, true); + radeon_cs_parser_fini(&parser, r); up_read(&rdev->exclusive_lock); r = radeon_cs_handle_lockup(rdev, r); return r; diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 5e958cc223f4..267f082bc430 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -37,7 +37,7 @@ #include <linux/mmu_notifier.h> #include <linux/pci.h> -#include <drm/drm_client_setup.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_fourcc.h> @@ -603,7 +603,6 @@ static const struct drm_driver kms_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = KMS_DRIVER_MAJOR, .minor = KMS_DRIVER_MINOR, .patchlevel = KMS_DRIVER_PATCHLEVEL, diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index 02a65971d140..0f3dbffc492d 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -43,7 +43,6 @@ #define DRIVER_NAME "radeon" #define DRIVER_DESC "ATI Radeon" -#define DRIVER_DATE "20080528" /* Interface history: * diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index bf2d4b16dc2a..f86773f3db20 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -605,33 +605,40 @@ out: static void radeon_gem_va_update_vm(struct radeon_device *rdev, struct radeon_bo_va *bo_va) { - struct ttm_validate_buffer tv, *entry; - struct radeon_bo_list *vm_bos; - struct ww_acquire_ctx ticket; + struct radeon_bo_list *vm_bos, *entry; struct list_head list; + struct drm_exec exec; unsigned domain; int r; INIT_LIST_HEAD(&list); - tv.bo = &bo_va->bo->tbo; - tv.num_shared = 1; - list_add(&tv.head, &list); - vm_bos = radeon_vm_get_bos(rdev, bo_va->vm, &list); if (!vm_bos) return; - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); - if (r) - goto error_free; + drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0); + drm_exec_until_all_locked(&exec) { + list_for_each_entry(entry, &list, list) { + r = drm_exec_prepare_obj(&exec, &entry->robj->tbo.base, + 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error_cleanup; + } - list_for_each_entry(entry, &list, head) { - domain = radeon_mem_type_to_domain(entry->bo->resource->mem_type); + r = drm_exec_prepare_obj(&exec, &bo_va->bo->tbo.base, 1); + drm_exec_retry_on_contention(&exec); + if (unlikely(r)) + goto error_cleanup; + } + + list_for_each_entry(entry, &list, list) { + domain = radeon_mem_type_to_domain(entry->robj->tbo.resource->mem_type); /* if anything is swapped out don't swap it in here, just abort and wait for the next CS */ if (domain == RADEON_GEM_DOMAIN_CPU) - goto error_unreserve; + goto error_cleanup; } mutex_lock(&bo_va->vm->mutex); @@ -645,10 +652,8 @@ static void radeon_gem_va_update_vm(struct radeon_device *rdev, error_unlock: mutex_unlock(&bo_va->vm->mutex); -error_unreserve: - ttm_eu_backoff_reservation(&ticket, &list); - -error_free: +error_cleanup: + drm_exec_fini(&exec); kvfree(vm_bos); if (r && r != -ERESTARTSYS) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 7672404fdb29..a0fc0801abb0 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -464,23 +464,26 @@ static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev) } int radeon_bo_list_validate(struct radeon_device *rdev, - struct ww_acquire_ctx *ticket, + struct drm_exec *exec, struct list_head *head, int ring) { struct ttm_operation_ctx ctx = { true, false }; struct radeon_bo_list *lobj; - struct list_head duplicates; - int r; u64 bytes_moved = 0, initial_bytes_moved; u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); + int r; - INIT_LIST_HEAD(&duplicates); - r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates); - if (unlikely(r != 0)) { - return r; + drm_exec_until_all_locked(exec) { + list_for_each_entry(lobj, head, list) { + r = drm_exec_prepare_obj(exec, &lobj->robj->tbo.base, + 1); + drm_exec_retry_on_contention(exec); + if (unlikely(r && r != -EALREADY)) + return r; + } } - list_for_each_entry(lobj, head, tv.head) { + list_for_each_entry(lobj, head, list) { struct radeon_bo *bo = lobj->robj; if (!bo->tbo.pin_count) { u32 domain = lobj->preferred_domains; @@ -519,7 +522,6 @@ int radeon_bo_list_validate(struct radeon_device *rdev, domain = lobj->allowed_domains; goto retry; } - ttm_eu_backoff_reservation(ticket, head); return r; } } @@ -527,11 +529,6 @@ int radeon_bo_list_validate(struct radeon_device *rdev, lobj->tiling_flags = bo->tiling_flags; } - list_for_each_entry(lobj, &duplicates, tv.head) { - lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj); - lobj->tiling_flags = lobj->robj->tiling_flags; - } - return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index 39cc87a59a9a..d7bbb52db546 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -152,7 +152,7 @@ extern void radeon_bo_force_delete(struct radeon_device *rdev); extern int radeon_bo_init(struct radeon_device *rdev); extern void radeon_bo_fini(struct radeon_device *rdev); extern int radeon_bo_list_validate(struct radeon_device *rdev, - struct ww_acquire_ctx *ticket, + struct drm_exec *exec, struct list_head *head, int ring); extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo, u32 tiling_flags, u32 pitch); diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index c38b4d5d6a14..21a5340aefdf 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -142,10 +142,9 @@ struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, list[0].robj = vm->page_directory; list[0].preferred_domains = RADEON_GEM_DOMAIN_VRAM; list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; - list[0].tv.bo = &vm->page_directory->tbo; - list[0].tv.num_shared = 1; + list[0].shared = true; list[0].tiling_flags = 0; - list_add(&list[0].tv.head, head); + list_add(&list[0].list, head); for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { if (!vm->page_tables[i].bo) @@ -154,10 +153,9 @@ struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, list[idx].robj = vm->page_tables[i].bo; list[idx].preferred_domains = RADEON_GEM_DOMAIN_VRAM; list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; - list[idx].tv.bo = &list[idx].robj->tbo; - list[idx].tv.num_shared = 1; + list[idx].shared = true; list[idx].tiling_flags = 0; - list_add(&list[idx++].tv.head, head); + list_add(&list[idx++].list, head); } return list; diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/renesas/rcar-du/rcar_du_drv.c index f9ecc334c024..d948ff3594c4 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_du_drv.c +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_du_drv.c @@ -18,8 +18,8 @@ #include <linux/slab.h> #include <linux/wait.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_dma_helper.h> @@ -546,6 +546,23 @@ static const struct rcar_du_device_info rcar_du_r8a779g0_info = { .dsi_clk_mask = BIT(1) | BIT(0), }; +static const struct rcar_du_device_info rcar_du_r8a779h0_info = { + .gen = 4, + .features = RCAR_DU_FEATURE_CRTC_IRQ + | RCAR_DU_FEATURE_VSP1_SOURCE + | RCAR_DU_FEATURE_NO_BLENDING, + .channels_mask = BIT(0), + .routes = { + /* R8A779H0 has one MIPI DSI output. */ + [RCAR_DU_OUTPUT_DSI0] = { + .possible_crtcs = BIT(0), + .port = 0, + }, + }, + .num_rpf = 5, + .dsi_clk_mask = BIT(0), +}; + static const struct of_device_id rcar_du_of_table[] = { { .compatible = "renesas,du-r8a7742", .data = &rcar_du_r8a7790_info }, { .compatible = "renesas,du-r8a7743", .data = &rzg1_du_r8a7743_info }, @@ -572,6 +589,7 @@ static const struct of_device_id rcar_du_of_table[] = { { .compatible = "renesas,du-r8a77995", .data = &rcar_du_r8a7799x_info }, { .compatible = "renesas,du-r8a779a0", .data = &rcar_du_r8a779a0_info }, { .compatible = "renesas,du-r8a779g0", .data = &rcar_du_r8a779g0_info }, + { .compatible = "renesas,du-r8a779h0", .data = &rcar_du_r8a779h0_info }, { } }; @@ -611,7 +629,6 @@ static const struct drm_driver rcar_du_driver = { .fops = &rcar_du_fops, .name = "rcar-du", .desc = "Renesas R-Car Display Unit", - .date = "20130110", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_du_group.c b/drivers/gpu/drm/renesas/rcar-du/rcar_du_group.c index 2ccd2581f544..068c106e586c 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_du_group.c +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_du_group.c @@ -107,10 +107,12 @@ static void rcar_du_group_setup_didsr(struct rcar_du_group *rgrp) */ rcrtc = rcdu->crtcs; num_crtcs = rcdu->num_crtcs; - } else if (rcdu->info->gen >= 3 && rgrp->num_crtcs > 1) { + } else if ((rcdu->info->gen == 3 && rgrp->num_crtcs > 1) || + rcdu->info->gen == 4) { /* * On Gen3 dot clocks are setup through per-group registers, * only available when the group has two channels. + * On Gen4 the registers are there for single channel too. */ rcrtc = &rcdu->crtcs[rgrp->index * 2]; num_crtcs = rgrp->num_crtcs; @@ -185,11 +187,21 @@ static void rcar_du_group_setup(struct rcar_du_group *rgrp) dorcr |= DORCR_PG1T | DORCR_DK1S | DORCR_PG1D_DS1; rcar_du_group_write(rgrp, DORCR, dorcr); - /* Apply planes to CRTCs association. */ - mutex_lock(&rgrp->lock); - rcar_du_group_write(rgrp, DPTSR, (rgrp->dptsr_planes << 16) | - rgrp->dptsr_planes); - mutex_unlock(&rgrp->lock); + /* + * DPTSR is used to select the source for the planes of a group. The + * first source is chosen by writing 0 to the respective bits, and this + * is always the default value of the register. In other words, writing + * DPTSR is only needed if the SoC supports choosing the second source. + * + * The SoCs documentations seems to confirm this, as the DPTSR register + * is not documented if only the first source exists on that SoC. + */ + if (rgrp->channels_mask & BIT(1)) { + mutex_lock(&rgrp->lock); + rcar_du_group_write(rgrp, DPTSR, (rgrp->dptsr_planes << 16) | + rgrp->dptsr_planes); + mutex_unlock(&rgrp->lock); + } } /* diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c index 8180625d5866..3c0c18d5249a 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi.c @@ -587,7 +587,7 @@ static int rcar_mipi_dsi_startup(struct rcar_mipi_dsi *dsi, for (timeout = 10; timeout > 0; --timeout) { if ((rcar_mipi_dsi_read(dsi, PPICLSR) & PPICLSR_STPST) && (rcar_mipi_dsi_read(dsi, PPIDLSR) & PPIDLSR_STPST) && - (rcar_mipi_dsi_read(dsi, CLOCKSET1) & CLOCKSET1_LOCK)) + (rcar_mipi_dsi_read(dsi, CLOCKSET1) & CLOCKSET1_LOCK_PHY)) break; usleep_range(1000, 2000); @@ -1081,6 +1081,8 @@ static const struct rcar_mipi_dsi_device_info v4h_data = { static const struct of_device_id rcar_mipi_dsi_of_table[] = { { .compatible = "renesas,r8a779a0-dsi-csi2-tx", .data = &v3u_data }, { .compatible = "renesas,r8a779g0-dsi-csi2-tx", .data = &v4h_data }, + /* DSI in r8a779h0 is identical to r8a779g0 */ + { .compatible = "renesas,r8a779h0-dsi-csi2-tx", .data = &v4h_data }, { } }; diff --git a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h index f8114d11f2d1..a6b276f1d6ee 100644 --- a/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h +++ b/drivers/gpu/drm/renesas/rcar-du/rcar_mipi_dsi_regs.h @@ -142,7 +142,6 @@ #define CLOCKSET1 0x101c #define CLOCKSET1_LOCK_PHY (1 << 17) -#define CLOCKSET1_LOCK (1 << 16) #define CLOCKSET1_CLKSEL (1 << 8) #define CLOCKSET1_CLKINSEL_EXTAL (0 << 2) #define CLOCKSET1_CLKINSEL_DIG (1 << 2) diff --git a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_crtc.c b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_crtc.c index c4c1474d487e..6e7aac6219be 100644 --- a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_crtc.c +++ b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_crtc.c @@ -28,7 +28,6 @@ #include "rzg2l_du_vsp.h" #define DU_MCR0 0x00 -#define DU_MCR0_DPI_OE BIT(0) #define DU_MCR0_DI_EN BIT(8) #define DU_DITR0 0x10 @@ -217,14 +216,9 @@ static void rzg2l_du_crtc_put(struct rzg2l_du_crtc *rcrtc) static void rzg2l_du_start_stop(struct rzg2l_du_crtc *rcrtc, bool start) { - struct rzg2l_du_crtc_state *rstate = to_rzg2l_crtc_state(rcrtc->crtc.state); struct rzg2l_du_device *rcdu = rcrtc->dev; - u32 val = DU_MCR0_DI_EN; - if (rstate->outputs & BIT(RZG2L_DU_OUTPUT_DPAD0)) - val |= DU_MCR0_DPI_OE; - - writel(start ? val : 0, rcdu->mmio + DU_MCR0); + writel(start ? DU_MCR0_DI_EN : 0, rcdu->mmio + DU_MCR0); } static void rzg2l_du_crtc_start(struct rzg2l_du_crtc *rcrtc) diff --git a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_drv.c b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_drv.c index b069efd8ffc3..cbd9b9841267 100644 --- a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_drv.c +++ b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_drv.c @@ -12,8 +12,8 @@ #include <linux/of.h> #include <linux/platform_device.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_dma_helper.h> @@ -84,7 +84,6 @@ static const struct drm_driver rzg2l_du_driver = { .fops = &rzg2l_du_fops, .name = "rzg2l-du", .desc = "Renesas RZ/G2L Display Unit", - .date = "20230410", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_encoder.c b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_encoder.c index 339cbaaea0b5..564ab4cb3d37 100644 --- a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_encoder.c +++ b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_encoder.c @@ -10,6 +10,7 @@ #include <linux/export.h> #include <linux/of.h> +#include <drm/drm_atomic_helper.h> #include <drm/drm_bridge.h> #include <drm/drm_bridge_connector.h> #include <drm/drm_panel.h> @@ -24,6 +25,22 @@ static const struct drm_encoder_funcs rzg2l_du_encoder_funcs = { }; +static enum drm_mode_status +rzg2l_du_encoder_mode_valid(struct drm_encoder *encoder, + const struct drm_display_mode *mode) +{ + struct rzg2l_du_encoder *renc = to_rzg2l_encoder(encoder); + + if (renc->output == RZG2L_DU_OUTPUT_DPAD0 && mode->clock > 83500) + return MODE_CLOCK_HIGH; + + return MODE_OK; +} + +static const struct drm_encoder_helper_funcs rzg2l_du_encoder_helper_funcs = { + .mode_valid = rzg2l_du_encoder_mode_valid, +}; + int rzg2l_du_encoder_init(struct rzg2l_du_device *rcdu, enum rzg2l_du_output output, struct device_node *enc_node) @@ -48,6 +65,7 @@ int rzg2l_du_encoder_init(struct rzg2l_du_device *rcdu, return PTR_ERR(renc); renc->output = output; + drm_encoder_helper_add(&renc->base, &rzg2l_du_encoder_helper_funcs); /* Attach the bridge to the encoder. */ ret = drm_bridge_attach(&renc->base, bridge, NULL, diff --git a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c index b99217b4e05d..90c6269ccd29 100644 --- a/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c +++ b/drivers/gpu/drm/renesas/rz-du/rzg2l_du_kms.c @@ -311,11 +311,11 @@ int rzg2l_du_modeset_init(struct rzg2l_du_device *rcdu) dev->mode_config.helper_private = &rzg2l_du_mode_config_helper; /* - * The RZ DU uses the VSP1 for memory access, and is limited - * to frame sizes of 1920x1080. + * The RZ DU was designed to support a frame size of 1920x1200 (landscape) + * or 1200x1920 (portrait). */ dev->mode_config.max_width = 1920; - dev->mode_config.max_height = 1080; + dev->mode_config.max_height = 1920; rcdu->num_crtcs = hweight8(rcdu->info->channels_mask); diff --git a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c index 76ee3e16077c..2f31822b2245 100644 --- a/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c +++ b/drivers/gpu/drm/renesas/shmobile/shmob_drm_drv.c @@ -17,8 +17,8 @@ #include <linux/pm_runtime.h> #include <linux/slab.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_fourcc.h> @@ -107,7 +107,6 @@ static const struct drm_driver shmob_drm_driver = { .fops = &shmob_drm_fops, .name = "shmob-drm", .desc = "Renesas SH Mobile DRM", - .date = "20120424", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig index 3ac579615749..26c4410b2407 100644 --- a/drivers/gpu/drm/rockchip/Kconfig +++ b/drivers/gpu/drm/rockchip/Kconfig @@ -11,6 +11,7 @@ config DRM_ROCKCHIP select DRM_DW_HDMI if ROCKCHIP_DW_HDMI select DRM_DW_HDMI_QP if ROCKCHIP_DW_HDMI_QP select DRM_DW_MIPI_DSI if ROCKCHIP_DW_MIPI_DSI + select DRM_DW_MIPI_DSI2 if ROCKCHIP_DW_MIPI_DSI2 select GENERIC_PHY if ROCKCHIP_DW_MIPI_DSI select GENERIC_PHY_MIPI_DPHY if ROCKCHIP_DW_MIPI_DSI select SND_SOC_HDMI_CODEC if ROCKCHIP_CDN_DP && SND_SOC @@ -82,6 +83,15 @@ config ROCKCHIP_DW_MIPI_DSI enable MIPI DSI on RK3288 or RK3399 based SoC, you should select this option. +config ROCKCHIP_DW_MIPI_DSI2 + bool "Rockchip specific extensions for Synopsys DW MIPI DSI2" + select GENERIC_PHY_MIPI_DPHY + help + This selects support for Rockchip SoC specific extensions + for the Synopsys DesignWare DSI2 driver. If you want to + enable MIPI DSI on RK3576 or RK3588 based SoC, you should + select this option. + config ROCKCHIP_INNO_HDMI bool "Rockchip specific extensions for Innosilicon HDMI" select DRM_DISPLAY_HDMI_HELPER diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile index 3eab662a5a1d..2b867cebbc12 100644 --- a/drivers/gpu/drm/rockchip/Makefile +++ b/drivers/gpu/drm/rockchip/Makefile @@ -13,6 +13,7 @@ rockchipdrm-$(CONFIG_ROCKCHIP_CDN_DP) += cdn-dp-core.o cdn-dp-reg.o rockchipdrm-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o rockchipdrm-$(CONFIG_ROCKCHIP_DW_HDMI_QP) += dw_hdmi_qp-rockchip.o rockchipdrm-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi-rockchip.o +rockchipdrm-$(CONFIG_ROCKCHIP_DW_MIPI_DSI2) += dw-mipi-dsi2-rockchip.o rockchipdrm-$(CONFIG_ROCKCHIP_INNO_HDMI) += inno_hdmi.o rockchipdrm-$(CONFIG_ROCKCHIP_LVDS) += rockchip_lvds.o rockchipdrm-$(CONFIG_ROCKCHIP_RGB) += rockchip_rgb.o diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c index 546d13f19f9b..0844175c37c5 100644 --- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c +++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c @@ -2,7 +2,7 @@ /* * Rockchip SoC DP (Display Port) interface driver. * - * Copyright (C) Fuzhou Rockchip Electronics Co., Ltd. + * Copyright (C) Rockchip Electronics Co., Ltd. * Author: Andy Yan <andy.yan@rock-chips.com> * Yakir Yang <ykk@rock-chips.com> * Jeff Chen <jeff.chen@rock-chips.com> @@ -386,7 +386,7 @@ static int rockchip_dp_probe(struct platform_device *pdev) return -ENODEV; ret = drm_of_find_panel_or_bridge(dev->of_node, 1, 0, &panel, NULL); - if (ret < 0) + if (ret < 0 && ret != -ENODEV) return ret; dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL); diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c index ff9d95e2c4d4..b76337f690ec 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-core.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author: Chris Zhong <zyw@rock-chips.com> */ @@ -947,9 +947,6 @@ static void cdn_dp_pd_event_work(struct work_struct *work) { struct cdn_dp_device *dp = container_of(work, struct cdn_dp_device, event_work); - struct drm_connector *connector = &dp->connector; - enum drm_connector_status old_status; - int ret; mutex_lock(&dp->lock); @@ -1009,11 +1006,7 @@ static void cdn_dp_pd_event_work(struct work_struct *work) out: mutex_unlock(&dp->lock); - - old_status = connector->status; - connector->status = connector->funcs->detect(connector, false); - if (old_status != connector->status) - drm_kms_helper_hotplug_event(dp->drm_dev); + drm_connector_helper_hpd_irq_event(&dp->connector); } static int cdn_dp_pd_event(struct notifier_block *nb, diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.h b/drivers/gpu/drm/rockchip/cdn-dp-core.h index 8e6e95d269da..17498f576ce7 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-core.h +++ b/drivers/gpu/drm/rockchip/cdn-dp-core.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2016 Chris Zhong <zyw@rock-chips.com> - * Copyright (C) 2016 ROCKCHIP, Inc. + * Copyright (C) Rockchip Electronics Co., Ltd. */ #ifndef _CDN_DP_CORE_H diff --git a/drivers/gpu/drm/rockchip/cdn-dp-reg.c b/drivers/gpu/drm/rockchip/cdn-dp-reg.c index 33fb4d05c506..924fb1d3ece2 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-reg.c +++ b/drivers/gpu/drm/rockchip/cdn-dp-reg.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author: Chris Zhong <zyw@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/cdn-dp-reg.h b/drivers/gpu/drm/rockchip/cdn-dp-reg.h index c7780ae3272a..13ed8cbdbafa 100644 --- a/drivers/gpu/drm/rockchip/cdn-dp-reg.h +++ b/drivers/gpu/drm/rockchip/cdn-dp-reg.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author: Chris Zhong <zyw@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index 1b64b6e39cc8..3398160ad75e 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0+ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author: * Chris Zhong <zyw@rock-chips.com> * Nickey Yang <nickey.yang@rock-chips.com> diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c new file mode 100644 index 000000000000..cdd490778756 --- /dev/null +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi2-rockchip.c @@ -0,0 +1,487 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2024 Rockchip Electronics Co., Ltd. + * Author: + * Guochun Huang <hero.huang@rock-chips.com> + * Heiko Stuebner <heiko.stuebner@cherry.de> + */ + +#include <linux/bitfield.h> +#include <linux/clk.h> +#include <linux/component.h> +#include <linux/media-bus-format.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/pm_runtime.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/reset.h> +#include <linux/mfd/syscon.h> +#include <linux/phy/phy.h> + +#include <drm/bridge/dw_mipi_dsi2.h> +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_of.h> +#include <drm/drm_simple_kms_helper.h> + +#include <uapi/linux/videodev2.h> + +#include "rockchip_drm_drv.h" + +#define PSEC_PER_SEC 1000000000000LL + +struct dsigrf_reg { + u16 offset; + u16 lsb; + u16 msb; +}; + +enum grf_reg_fields { + TXREQCLKHS_EN, + GATING_EN, + IPI_SHUTDN, + IPI_COLORM, + IPI_COLOR_DEPTH, + IPI_FORMAT, + MAX_FIELDS, +}; + +#define IPI_DEPTH_5_6_5_BITS 0x02 +#define IPI_DEPTH_6_BITS 0x03 +#define IPI_DEPTH_8_BITS 0x05 +#define IPI_DEPTH_10_BITS 0x06 + +struct rockchip_dw_dsi2_chip_data { + u32 reg; + const struct dsigrf_reg *grf_regs; + unsigned long long max_bit_rate_per_lane; +}; + +struct dw_mipi_dsi2_rockchip { + struct device *dev; + struct rockchip_encoder encoder; + struct regmap *regmap; + + unsigned int lane_mbps; /* per lane */ + u32 format; + + struct regmap *grf_regmap; + struct phy *phy; + union phy_configure_opts phy_opts; + + struct dw_mipi_dsi2 *dmd; + struct dw_mipi_dsi2_plat_data pdata; + const struct rockchip_dw_dsi2_chip_data *cdata; +}; + +static inline struct dw_mipi_dsi2_rockchip *to_dsi2(struct drm_encoder *encoder) +{ + struct rockchip_encoder *rkencoder = to_rockchip_encoder(encoder); + + return container_of(rkencoder, struct dw_mipi_dsi2_rockchip, encoder); +} + +static void grf_field_write(struct dw_mipi_dsi2_rockchip *dsi2, enum grf_reg_fields index, + unsigned int val) +{ + const struct dsigrf_reg *field = &dsi2->cdata->grf_regs[index]; + + if (!field) + return; + + regmap_write(dsi2->grf_regmap, field->offset, + (val << field->lsb) | (GENMASK(field->msb, field->lsb) << 16)); +} + +static int dw_mipi_dsi2_phy_init(void *priv_data) +{ + return 0; +} + +static void dw_mipi_dsi2_phy_power_on(void *priv_data) +{ + struct dw_mipi_dsi2_rockchip *dsi2 = priv_data; + int ret; + + ret = phy_set_mode(dsi2->phy, PHY_MODE_MIPI_DPHY); + if (ret) { + dev_err(dsi2->dev, "Failed to set phy mode: %d\n", ret); + return; + } + + phy_configure(dsi2->phy, &dsi2->phy_opts); + phy_power_on(dsi2->phy); +} + +static void dw_mipi_dsi2_phy_power_off(void *priv_data) +{ + struct dw_mipi_dsi2_rockchip *dsi2 = priv_data; + + phy_power_off(dsi2->phy); +} + +static int +dw_mipi_dsi2_get_lane_mbps(void *priv_data, const struct drm_display_mode *mode, + unsigned long mode_flags, u32 lanes, u32 format, + unsigned int *lane_mbps) +{ + struct dw_mipi_dsi2_rockchip *dsi2 = priv_data; + u64 max_lane_rate, target_phyclk; + unsigned int lane_rate_kbps; + int bpp; + + max_lane_rate = dsi2->cdata->max_bit_rate_per_lane; + + dsi2->format = format; + bpp = mipi_dsi_pixel_format_to_bpp(format); + if (bpp < 0) { + dev_err(dsi2->dev, "failed to get bpp for pixel format %d\n", format); + return bpp; + } + + lane_rate_kbps = mode->clock * bpp / lanes; + + /* + * Set BW a little larger only in video burst mode in + * consideration of the protocol overhead and HS mode + * switching to BLLP mode, take 1 / 0.9, since Mbps must + * big than bandwidth of RGB + */ + if (mode_flags & MIPI_DSI_MODE_VIDEO_BURST) + lane_rate_kbps = (lane_rate_kbps * 10) / 9; + + if (lane_rate_kbps > max_lane_rate) { + dev_err(dsi2->dev, "DPHY clock frequency is out of range\n"); + return -ERANGE; + } + + dsi2->lane_mbps = lane_rate_kbps / 1000; + *lane_mbps = dsi2->lane_mbps; + + if (dsi2->phy) { + target_phyclk = DIV_ROUND_CLOSEST_ULL(lane_rate_kbps * lanes * 1000, bpp); + phy_mipi_dphy_get_default_config(target_phyclk, bpp, lanes, + &dsi2->phy_opts.mipi_dphy); + } + + return 0; +} + +static void dw_mipi_dsi2_phy_get_iface(void *priv_data, struct dw_mipi_dsi2_phy_iface *iface) +{ + /* PPI width is fixed to 16 bits in DCPHY */ + iface->ppi_width = 16; + iface->phy_type = DW_MIPI_DSI2_DPHY; +} + +static int +dw_mipi_dsi2_phy_get_timing(void *priv_data, unsigned int lane_mbps, + struct dw_mipi_dsi2_phy_timing *timing) +{ + struct dw_mipi_dsi2_rockchip *dsi2 = priv_data; + struct phy_configure_opts_mipi_dphy *cfg = &dsi2->phy_opts.mipi_dphy; + unsigned long long tmp, ui; + unsigned long long hstx_clk; + + hstx_clk = DIV_ROUND_CLOSEST_ULL(dsi2->lane_mbps * USEC_PER_SEC, 16); + + ui = ALIGN(PSEC_PER_SEC, hstx_clk); + do_div(ui, hstx_clk); + + /* PHY_LP2HS_TIME = (TLPX + THS-PREPARE + THS-ZERO) / Tphy_hstx_clk */ + tmp = cfg->lpx + cfg->hs_prepare + cfg->hs_zero; + tmp = DIV_ROUND_CLOSEST_ULL(tmp << 16, ui); + timing->data_lp2hs = tmp; + + /* PHY_HS2LP_TIME = (THS-TRAIL + THS-EXIT) / Tphy_hstx_clk */ + tmp = cfg->hs_trail + cfg->hs_exit; + tmp = DIV_ROUND_CLOSEST_ULL(tmp << 16, ui); + timing->data_hs2lp = tmp; + + return 0; +} + +static const struct dw_mipi_dsi2_phy_ops dw_mipi_dsi2_rockchip_phy_ops = { + .init = dw_mipi_dsi2_phy_init, + .power_on = dw_mipi_dsi2_phy_power_on, + .power_off = dw_mipi_dsi2_phy_power_off, + .get_interface = dw_mipi_dsi2_phy_get_iface, + .get_lane_mbps = dw_mipi_dsi2_get_lane_mbps, + .get_timing = dw_mipi_dsi2_phy_get_timing, +}; + +static void dw_mipi_dsi2_encoder_atomic_enable(struct drm_encoder *encoder, + struct drm_atomic_state *state) +{ + struct dw_mipi_dsi2_rockchip *dsi2 = to_dsi2(encoder); + u32 color_depth; + + switch (dsi2->format) { + case MIPI_DSI_FMT_RGB666: + case MIPI_DSI_FMT_RGB666_PACKED: + color_depth = IPI_DEPTH_6_BITS; + break; + case MIPI_DSI_FMT_RGB565: + color_depth = IPI_DEPTH_5_6_5_BITS; + break; + case MIPI_DSI_FMT_RGB888: + color_depth = IPI_DEPTH_8_BITS; + break; + default: + /* Should've been caught by atomic_check */ + WARN_ON(1); + return; + } + + grf_field_write(dsi2, IPI_COLOR_DEPTH, color_depth); +} + +static int +dw_mipi_dsi2_encoder_atomic_check(struct drm_encoder *encoder, + struct drm_crtc_state *crtc_state, + struct drm_connector_state *conn_state) +{ + struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc_state); + struct dw_mipi_dsi2_rockchip *dsi2 = to_dsi2(encoder); + struct drm_connector *connector = conn_state->connector; + struct drm_display_info *info = &connector->display_info; + + switch (dsi2->format) { + case MIPI_DSI_FMT_RGB666: + case MIPI_DSI_FMT_RGB666_PACKED: + s->output_mode = ROCKCHIP_OUT_MODE_P666; + break; + case MIPI_DSI_FMT_RGB565: + s->output_mode = ROCKCHIP_OUT_MODE_P565; + break; + case MIPI_DSI_FMT_RGB888: + s->output_mode = ROCKCHIP_OUT_MODE_P888; + break; + default: + WARN_ON(1); + return -EINVAL; + } + + if (info->num_bus_formats) + s->bus_format = info->bus_formats[0]; + else + s->bus_format = MEDIA_BUS_FMT_RGB888_1X24; + + s->output_type = DRM_MODE_CONNECTOR_DSI; + s->bus_flags = info->bus_flags; + s->color_space = V4L2_COLORSPACE_DEFAULT; + + return 0; +} + +static const struct drm_encoder_helper_funcs +dw_mipi_dsi2_encoder_helper_funcs = { + .atomic_enable = dw_mipi_dsi2_encoder_atomic_enable, + .atomic_check = dw_mipi_dsi2_encoder_atomic_check, +}; + +static int rockchip_dsi2_drm_create_encoder(struct dw_mipi_dsi2_rockchip *dsi2, + struct drm_device *drm_dev) +{ + struct drm_encoder *encoder = &dsi2->encoder.encoder; + int ret; + + encoder->possible_crtcs = drm_of_find_possible_crtcs(drm_dev, + dsi2->dev->of_node); + + ret = drm_simple_encoder_init(drm_dev, encoder, DRM_MODE_ENCODER_DSI); + if (ret) { + dev_err(dsi2->dev, "Failed to initialize encoder with drm\n"); + return ret; + } + + drm_encoder_helper_add(encoder, &dw_mipi_dsi2_encoder_helper_funcs); + + return 0; +} + +static int dw_mipi_dsi2_rockchip_bind(struct device *dev, struct device *master, + void *data) +{ + struct dw_mipi_dsi2_rockchip *dsi2 = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + int ret; + + ret = rockchip_dsi2_drm_create_encoder(dsi2, drm_dev); + if (ret) + return dev_err_probe(dev, ret, "Failed to create drm encoder\n"); + + rockchip_drm_encoder_set_crtc_endpoint_id(&dsi2->encoder, + dev->of_node, 0, 0); + + ret = dw_mipi_dsi2_bind(dsi2->dmd, &dsi2->encoder.encoder); + if (ret) + return dev_err_probe(dev, ret, "Failed to bind\n"); + + return 0; +} + +static void dw_mipi_dsi2_rockchip_unbind(struct device *dev, struct device *master, + void *data) +{ + struct dw_mipi_dsi2_rockchip *dsi2 = dev_get_drvdata(dev); + + dw_mipi_dsi2_unbind(dsi2->dmd); +} + +static const struct component_ops dw_mipi_dsi2_rockchip_ops = { + .bind = dw_mipi_dsi2_rockchip_bind, + .unbind = dw_mipi_dsi2_rockchip_unbind, +}; + +static int dw_mipi_dsi2_rockchip_host_attach(void *priv_data, + struct mipi_dsi_device *device) +{ + struct dw_mipi_dsi2_rockchip *dsi2 = priv_data; + int ret; + + ret = component_add(dsi2->dev, &dw_mipi_dsi2_rockchip_ops); + if (ret) + return dev_err_probe(dsi2->dev, ret, "Failed to register component\n"); + + return 0; +} + +static int dw_mipi_dsi2_rockchip_host_detach(void *priv_data, + struct mipi_dsi_device *device) +{ + struct dw_mipi_dsi2_rockchip *dsi2 = priv_data; + + component_del(dsi2->dev, &dw_mipi_dsi2_rockchip_ops); + + return 0; +} + +static const struct dw_mipi_dsi2_host_ops dw_mipi_dsi2_rockchip_host_ops = { + .attach = dw_mipi_dsi2_rockchip_host_attach, + .detach = dw_mipi_dsi2_rockchip_host_detach, +}; + +static const struct regmap_config dw_mipi_dsi2_rockchip_regmap_config = { + .name = "dsi2-host", + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .fast_io = true, +}; + +static int dw_mipi_dsi2_rockchip_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *np = dev->of_node; + const struct rockchip_dw_dsi2_chip_data *cdata = + of_device_get_match_data(dev); + struct dw_mipi_dsi2_rockchip *dsi2; + struct resource *res; + void __iomem *base; + int i; + + dsi2 = devm_kzalloc(dev, sizeof(*dsi2), GFP_KERNEL); + if (!dsi2) + return -ENOMEM; + + base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); + if (IS_ERR(base)) + return dev_err_probe(dev, PTR_ERR(base), "Unable to get dsi registers\n"); + + dsi2->regmap = devm_regmap_init_mmio(dev, base, &dw_mipi_dsi2_rockchip_regmap_config); + if (IS_ERR(dsi2->regmap)) + return dev_err_probe(dev, PTR_ERR(dsi2->regmap), "failed to init register map\n"); + + i = 0; + while (cdata[i].reg) { + if (cdata[i].reg == res->start) { + dsi2->cdata = &cdata[i]; + break; + } + + i++; + } + + if (!dsi2->cdata) + return dev_err_probe(dev, -EINVAL, "No dsi-config for %s node\n", np->name); + + dsi2->grf_regmap = syscon_regmap_lookup_by_phandle(dev->of_node, "rockchip,grf"); + if (IS_ERR(dsi2->grf_regmap)) + return dev_err_probe(dsi2->dev, PTR_ERR(dsi2->grf_regmap), "Unable to get grf\n"); + + dsi2->phy = devm_phy_optional_get(dev, "dcphy"); + if (IS_ERR(dsi2->phy)) + return dev_err_probe(dev, PTR_ERR(dsi2->phy), "failed to get mipi phy\n"); + + dsi2->dev = dev; + dsi2->pdata.regmap = dsi2->regmap; + dsi2->pdata.max_data_lanes = 4; + dsi2->pdata.phy_ops = &dw_mipi_dsi2_rockchip_phy_ops; + dsi2->pdata.host_ops = &dw_mipi_dsi2_rockchip_host_ops; + dsi2->pdata.priv_data = dsi2; + platform_set_drvdata(pdev, dsi2); + + dsi2->dmd = dw_mipi_dsi2_probe(pdev, &dsi2->pdata); + if (IS_ERR(dsi2->dmd)) + return dev_err_probe(dev, PTR_ERR(dsi2->dmd), "Failed to probe dw_mipi_dsi2\n"); + + return 0; +} + +static void dw_mipi_dsi2_rockchip_remove(struct platform_device *pdev) +{ + struct dw_mipi_dsi2_rockchip *dsi2 = platform_get_drvdata(pdev); + + dw_mipi_dsi2_remove(dsi2->dmd); +} + +static const struct dsigrf_reg rk3588_dsi0_grf_reg_fields[MAX_FIELDS] = { + [TXREQCLKHS_EN] = { 0x0000, 11, 11 }, + [GATING_EN] = { 0x0000, 10, 10 }, + [IPI_SHUTDN] = { 0x0000, 9, 9 }, + [IPI_COLORM] = { 0x0000, 8, 8 }, + [IPI_COLOR_DEPTH] = { 0x0000, 4, 7 }, + [IPI_FORMAT] = { 0x0000, 0, 3 }, +}; + +static const struct dsigrf_reg rk3588_dsi1_grf_reg_fields[MAX_FIELDS] = { + [TXREQCLKHS_EN] = { 0x0004, 11, 11 }, + [GATING_EN] = { 0x0004, 10, 10 }, + [IPI_SHUTDN] = { 0x0004, 9, 9 }, + [IPI_COLORM] = { 0x0004, 8, 8 }, + [IPI_COLOR_DEPTH] = { 0x0004, 4, 7 }, + [IPI_FORMAT] = { 0x0004, 0, 3 }, +}; + +static const struct rockchip_dw_dsi2_chip_data rk3588_chip_data[] = { + { + .reg = 0xfde20000, + .grf_regs = rk3588_dsi0_grf_reg_fields, + .max_bit_rate_per_lane = 4500000ULL, + }, + { + .reg = 0xfde30000, + .grf_regs = rk3588_dsi1_grf_reg_fields, + .max_bit_rate_per_lane = 4500000ULL, + } +}; + +static const struct of_device_id dw_mipi_dsi2_rockchip_dt_ids[] = { + { + .compatible = "rockchip,rk3588-mipi-dsi2", + .data = &rk3588_chip_data, + }, + {} +}; +MODULE_DEVICE_TABLE(of, dw_mipi_dsi2_rockchip_dt_ids); + +struct platform_driver dw_mipi_dsi2_rockchip_driver = { + .probe = dw_mipi_dsi2_rockchip_probe, + .remove = dw_mipi_dsi2_rockchip_remove, + .driver = { + .of_match_table = dw_mipi_dsi2_rockchip_dt_ids, + .name = "dw-mipi-dsi2", + }, +}; diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index 42bda4ffbbbd..e7a6669c46b0 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd + * Copyright (c) 2014, Rockchip Electronics Co., Ltd. */ #include <linux/clk.h> diff --git a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c index c8b362cc2b95..e498767a0a66 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi_qp-rockchip.c @@ -28,20 +28,26 @@ #define RK3588_GRF_SOC_CON2 0x0308 #define RK3588_HDMI0_HPD_INT_MSK BIT(13) #define RK3588_HDMI0_HPD_INT_CLR BIT(12) +#define RK3588_HDMI1_HPD_INT_MSK BIT(15) +#define RK3588_HDMI1_HPD_INT_CLR BIT(14) #define RK3588_GRF_SOC_CON7 0x031c #define RK3588_SET_HPD_PATH_MASK GENMASK(13, 12) #define RK3588_GRF_SOC_STATUS1 0x0384 #define RK3588_HDMI0_LEVEL_INT BIT(16) +#define RK3588_HDMI1_LEVEL_INT BIT(24) #define RK3588_GRF_VO1_CON3 0x000c +#define RK3588_GRF_VO1_CON6 0x0018 #define RK3588_SCLIN_MASK BIT(9) #define RK3588_SDAIN_MASK BIT(10) #define RK3588_MODE_MASK BIT(11) #define RK3588_I2S_SEL_MASK BIT(13) #define RK3588_GRF_VO1_CON9 0x0024 #define RK3588_HDMI0_GRANT_SEL BIT(10) +#define RK3588_HDMI1_GRANT_SEL BIT(12) #define HIWORD_UPDATE(val, mask) ((val) | (mask) << 16) #define HOTPLUG_DEBOUNCE_MS 150 +#define MAX_HDMI_PORT_NUM 2 struct rockchip_hdmi_qp { struct device *dev; @@ -53,6 +59,7 @@ struct rockchip_hdmi_qp { struct phy *phy; struct gpio_desc *enable_gpio; struct delayed_work hpd_work; + int port_id; }; static struct rockchip_hdmi_qp *to_rockchip_hdmi_qp(struct drm_encoder *encoder) @@ -127,20 +134,24 @@ dw_hdmi_qp_rk3588_read_hpd(struct dw_hdmi_qp *dw_hdmi, void *data) u32 val; regmap_read(hdmi->regmap, RK3588_GRF_SOC_STATUS1, &val); + val &= hdmi->port_id ? RK3588_HDMI1_LEVEL_INT : RK3588_HDMI0_LEVEL_INT; - return val & RK3588_HDMI0_LEVEL_INT ? - connector_status_connected : connector_status_disconnected; + return val ? connector_status_connected : connector_status_disconnected; } static void dw_hdmi_qp_rk3588_setup_hpd(struct dw_hdmi_qp *dw_hdmi, void *data) { struct rockchip_hdmi_qp *hdmi = (struct rockchip_hdmi_qp *)data; + u32 val; + + if (hdmi->port_id) + val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_CLR, + RK3588_HDMI1_HPD_INT_CLR | RK3588_HDMI1_HPD_INT_MSK); + else + val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_CLR, + RK3588_HDMI0_HPD_INT_CLR | RK3588_HDMI0_HPD_INT_MSK); - regmap_write(hdmi->regmap, - RK3588_GRF_SOC_CON2, - HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_CLR, - RK3588_HDMI0_HPD_INT_CLR | - RK3588_HDMI0_HPD_INT_MSK)); + regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); } static const struct dw_hdmi_qp_phy_ops rk3588_hdmi_phy_ops = { @@ -173,8 +184,12 @@ static irqreturn_t dw_hdmi_qp_rk3588_hardirq(int irq, void *dev_id) regmap_read(hdmi->regmap, RK3588_GRF_SOC_STATUS1, &intr_stat); if (intr_stat) { - val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_MSK, - RK3588_HDMI0_HPD_INT_MSK); + if (hdmi->port_id) + val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_MSK, + RK3588_HDMI1_HPD_INT_MSK); + else + val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_MSK, + RK3588_HDMI0_HPD_INT_MSK); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); return IRQ_WAKE_THREAD; } @@ -191,22 +206,44 @@ static irqreturn_t dw_hdmi_qp_rk3588_irq(int irq, void *dev_id) if (!intr_stat) return IRQ_NONE; - val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_CLR, - RK3588_HDMI0_HPD_INT_CLR); + if (hdmi->port_id) + val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_CLR, + RK3588_HDMI1_HPD_INT_CLR); + else + val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_CLR, + RK3588_HDMI0_HPD_INT_CLR); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); mod_delayed_work(system_wq, &hdmi->hpd_work, msecs_to_jiffies(HOTPLUG_DEBOUNCE_MS)); - val |= HIWORD_UPDATE(0, RK3588_HDMI0_HPD_INT_MSK); + if (hdmi->port_id) + val |= HIWORD_UPDATE(0, RK3588_HDMI1_HPD_INT_MSK); + else + val |= HIWORD_UPDATE(0, RK3588_HDMI0_HPD_INT_MSK); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); return IRQ_HANDLED; } +struct rockchip_hdmi_qp_cfg { + unsigned int num_ports; + unsigned int port_ids[MAX_HDMI_PORT_NUM]; + const struct dw_hdmi_qp_phy_ops *phy_ops; +}; + +static const struct rockchip_hdmi_qp_cfg rk3588_hdmi_cfg = { + .num_ports = 2, + .port_ids = { + 0xfde80000, + 0xfdea0000, + }, + .phy_ops = &rk3588_hdmi_phy_ops, +}; + static const struct of_device_id dw_hdmi_qp_rockchip_dt_ids[] = { { .compatible = "rockchip,rk3588-dw-hdmi-qp", - .data = &rk3588_hdmi_phy_ops }, + .data = &rk3588_hdmi_cfg }, {}, }; MODULE_DEVICE_TABLE(of, dw_hdmi_qp_rockchip_dt_ids); @@ -214,17 +251,15 @@ MODULE_DEVICE_TABLE(of, dw_hdmi_qp_rockchip_dt_ids); static int dw_hdmi_qp_rockchip_bind(struct device *dev, struct device *master, void *data) { - static const char * const clk_names[] = { - "pclk", "earc", "aud", "hdp", "hclk_vo1", - "ref" /* keep "ref" last */ - }; struct platform_device *pdev = to_platform_device(dev); + const struct rockchip_hdmi_qp_cfg *cfg; struct dw_hdmi_qp_plat_data plat_data; struct drm_device *drm = data; struct drm_connector *connector; struct drm_encoder *encoder; struct rockchip_hdmi_qp *hdmi; - struct clk *clk; + struct resource *res; + struct clk_bulk_data *clks; int ret, irq, i; u32 val; @@ -235,12 +270,31 @@ static int dw_hdmi_qp_rockchip_bind(struct device *dev, struct device *master, if (!hdmi) return -ENOMEM; - plat_data.phy_ops = of_device_get_match_data(dev); - if (!plat_data.phy_ops) + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + cfg = of_device_get_match_data(dev); + if (!cfg) return -ENODEV; - plat_data.phy_data = hdmi; hdmi->dev = &pdev->dev; + hdmi->port_id = -ENODEV; + + /* Identify port ID by matching base IO address */ + for (i = 0; i < cfg->num_ports; i++) { + if (res->start == cfg->port_ids[i]) { + hdmi->port_id = i; + break; + } + } + if (hdmi->port_id < 0) { + drm_err(hdmi, "Failed to match HDMI port ID\n"); + return hdmi->port_id; + } + + plat_data.phy_ops = cfg->phy_ops; + plat_data.phy_data = hdmi; encoder = &hdmi->encoder.encoder; encoder->possible_crtcs = drm_of_find_possible_crtcs(drm, dev->of_node); @@ -270,18 +324,22 @@ static int dw_hdmi_qp_rockchip_bind(struct device *dev, struct device *master, return PTR_ERR(hdmi->vo_regmap); } - for (i = 0; i < ARRAY_SIZE(clk_names); i++) { - clk = devm_clk_get_enabled(hdmi->dev, clk_names[i]); + ret = devm_clk_bulk_get_all_enabled(hdmi->dev, &clks); + if (ret < 0) { + drm_err(hdmi, "Failed to get clocks: %d\n", ret); + return ret; + } - if (IS_ERR(clk)) { - ret = PTR_ERR(clk); - if (ret != -EPROBE_DEFER) - drm_err(hdmi, "Failed to get %s clock: %d\n", - clk_names[i], ret); - return ret; + for (i = 0; i < ret; i++) { + if (!strcmp(clks[i].id, "ref")) { + hdmi->ref_clk = clks[1].clk; + break; } } - hdmi->ref_clk = clk; + if (!hdmi->ref_clk) { + drm_err(hdmi, "Missing ref clock\n"); + return -EINVAL; + } hdmi->enable_gpio = devm_gpiod_get_optional(hdmi->dev, "enable", GPIOD_OUT_HIGH); @@ -303,17 +361,26 @@ static int dw_hdmi_qp_rockchip_bind(struct device *dev, struct device *master, HIWORD_UPDATE(RK3588_SDAIN_MASK, RK3588_SDAIN_MASK) | HIWORD_UPDATE(RK3588_MODE_MASK, RK3588_MODE_MASK) | HIWORD_UPDATE(RK3588_I2S_SEL_MASK, RK3588_I2S_SEL_MASK); - regmap_write(hdmi->vo_regmap, RK3588_GRF_VO1_CON3, val); + regmap_write(hdmi->vo_regmap, + hdmi->port_id ? RK3588_GRF_VO1_CON6 : RK3588_GRF_VO1_CON3, + val); val = HIWORD_UPDATE(RK3588_SET_HPD_PATH_MASK, RK3588_SET_HPD_PATH_MASK); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON7, val); - val = HIWORD_UPDATE(RK3588_HDMI0_GRANT_SEL, - RK3588_HDMI0_GRANT_SEL); + if (hdmi->port_id) + val = HIWORD_UPDATE(RK3588_HDMI1_GRANT_SEL, + RK3588_HDMI1_GRANT_SEL); + else + val = HIWORD_UPDATE(RK3588_HDMI0_GRANT_SEL, + RK3588_HDMI0_GRANT_SEL); regmap_write(hdmi->vo_regmap, RK3588_GRF_VO1_CON9, val); - val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_MSK, RK3588_HDMI0_HPD_INT_MSK); + if (hdmi->port_id) + val = HIWORD_UPDATE(RK3588_HDMI1_HPD_INT_MSK, RK3588_HDMI1_HPD_INT_MSK); + else + val = HIWORD_UPDATE(RK3588_HDMI0_HPD_INT_MSK, RK3588_HDMI0_HPD_INT_MSK); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON2, val); INIT_DELAYED_WORK(&hdmi->hpd_work, dw_hdmi_qp_rk3588_hpd_work); @@ -391,14 +458,20 @@ static int __maybe_unused dw_hdmi_qp_rockchip_resume(struct device *dev) HIWORD_UPDATE(RK3588_SDAIN_MASK, RK3588_SDAIN_MASK) | HIWORD_UPDATE(RK3588_MODE_MASK, RK3588_MODE_MASK) | HIWORD_UPDATE(RK3588_I2S_SEL_MASK, RK3588_I2S_SEL_MASK); - regmap_write(hdmi->vo_regmap, RK3588_GRF_VO1_CON3, val); + regmap_write(hdmi->vo_regmap, + hdmi->port_id ? RK3588_GRF_VO1_CON6 : RK3588_GRF_VO1_CON3, + val); val = HIWORD_UPDATE(RK3588_SET_HPD_PATH_MASK, RK3588_SET_HPD_PATH_MASK); regmap_write(hdmi->regmap, RK3588_GRF_SOC_CON7, val); - val = HIWORD_UPDATE(RK3588_HDMI0_GRANT_SEL, - RK3588_HDMI0_GRANT_SEL); + if (hdmi->port_id) + val = HIWORD_UPDATE(RK3588_HDMI1_GRANT_SEL, + RK3588_HDMI1_GRANT_SEL); + else + val = HIWORD_UPDATE(RK3588_HDMI0_GRANT_SEL, + RK3588_HDMI0_GRANT_SEL); regmap_write(hdmi->vo_regmap, RK3588_GRF_VO1_CON9, val); dw_hdmi_qp_resume(dev, hdmi->hdmi); diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index b58e2a29294b..898d90155057 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Zheng Yang <zhengyang@rock-chips.com> * Yakir Yang <ykk@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.h b/drivers/gpu/drm/rockchip/inno_hdmi.h index a7edf3559e60..8b7ef3fac485 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.h +++ b/drivers/gpu/drm/rockchip/inno_hdmi.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Zheng Yang <zhengyang@rock-chips.com> * Yakir Yang <ykk@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c index b0fc8ace2e41..403336397214 100644 --- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c +++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Zheng Yang <zhengyang@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.h b/drivers/gpu/drm/rockchip/rk3066_hdmi.h index 39a31c62a428..c3598ba7428c 100644 --- a/drivers/gpu/drm/rockchip/rk3066_hdmi.h +++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Zheng Yang <zhengyang@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c index 32d8394c4c49..439edc165ff6 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author:Mark Yao <mark.yao@rock-chips.com> * * based on exynos_drm_drv.c @@ -17,7 +17,7 @@ #include <linux/console.h> #include <linux/iommu.h> -#include <drm/drm_client_setup.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_dma_helper.h> @@ -39,7 +39,6 @@ #define DRIVER_NAME "rockchip" #define DRIVER_DESC "RockChip Soc DRM" -#define DRIVER_DATE "20140818" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 @@ -235,7 +234,6 @@ static const struct drm_driver rockchip_drm_driver = { .fops = &rockchip_drm_driver_fops, .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, }; @@ -488,8 +486,7 @@ static void rockchip_drm_platform_shutdown(struct platform_device *pdev) { struct drm_device *drm = platform_get_drvdata(pdev); - if (drm) - drm_atomic_helper_shutdown(drm); + drm_atomic_helper_shutdown(drm); } static const struct of_device_id rockchip_drm_dt_ids[] = { @@ -536,6 +533,8 @@ static int __init rockchip_drm_init(void) CONFIG_ROCKCHIP_DW_HDMI_QP); ADD_ROCKCHIP_SUB_DRIVER(dw_mipi_dsi_rockchip_driver, CONFIG_ROCKCHIP_DW_MIPI_DSI); + ADD_ROCKCHIP_SUB_DRIVER(dw_mipi_dsi2_rockchip_driver, + CONFIG_ROCKCHIP_DW_MIPI_DSI2); ADD_ROCKCHIP_SUB_DRIVER(inno_hdmi_driver, CONFIG_ROCKCHIP_INNO_HDMI); ADD_ROCKCHIP_SUB_DRIVER(rk3066_hdmi_driver, CONFIG_ROCKCHIP_RK3066_HDMI); diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h index 24b4ce5ceaf1..c183e82a42a5 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author:Mark Yao <mark.yao@rock-chips.com> * * based on exynos_drm_drv.h @@ -90,6 +90,7 @@ extern struct platform_driver cdn_dp_driver; extern struct platform_driver dw_hdmi_rockchip_pltfm_driver; extern struct platform_driver dw_hdmi_qp_rockchip_pltfm_driver; extern struct platform_driver dw_mipi_dsi_rockchip_driver; +extern struct platform_driver dw_mipi_dsi2_rockchip_driver; extern struct platform_driver inno_hdmi_driver; extern struct platform_driver rockchip_dp_driver; extern struct platform_driver rockchip_lvds_driver; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c index cfe8b793d344..dcc1f07632c3 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author:Mark Yao <mark.yao@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.h b/drivers/gpu/drm/rockchip/rockchip_drm_fb.h index bae4e079dfb1..5179026b12d6 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author:Mark Yao <mark.yao@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c index 93ed841f5dce..6330b883efc3 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author:Mark Yao <mark.yao@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.h b/drivers/gpu/drm/rockchip/rockchip_drm_gem.h index 72f59ac6d258..cdeae36b91a1 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author:Mark Yao <mark.yao@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 69900138295b..57747f1cff26 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author:Mark Yao <mark.yao@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h index 0cf512cc1614..f04c9731ae7b 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author:Mark Yao <mark.yao@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 9873172e3fd3..3ca6a688f5f5 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -24,16 +24,17 @@ #include <drm/drm_atomic_uapi.h> #include <drm/drm_blend.h> #include <drm/drm_crtc.h> +#include <linux/debugfs.h> #include <drm/drm_debugfs.h> #include <drm/drm_flip_work.h> #include <drm/drm_framebuffer.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> #include <uapi/linux/videodev2.h> #include <dt-bindings/soc/rockchip,vop2.h> -#include "rockchip_drm_drv.h" #include "rockchip_drm_gem.h" #include "rockchip_drm_vop2.h" #include "rockchip_rgb.h" @@ -186,6 +187,7 @@ struct vop2 { */ u32 registered_num_wins; + struct resource *res; void __iomem *regs; struct regmap *map; @@ -237,6 +239,37 @@ struct vop2 { #define vop2_output_if_is_dpi(x) ((x) == ROCKCHIP_VOP2_EP_RGB0) +/* + * bus-format types. + */ +struct drm_bus_format_enum_list { + int type; + const char *name; +}; + +static const struct drm_bus_format_enum_list drm_bus_format_enum_list[] = { + { DRM_MODE_CONNECTOR_Unknown, "Unknown" }, + { MEDIA_BUS_FMT_RGB565_1X16, "RGB565_1X16" }, + { MEDIA_BUS_FMT_RGB666_1X18, "RGB666_1X18" }, + { MEDIA_BUS_FMT_RGB666_1X24_CPADHI, "RGB666_1X24_CPADHI" }, + { MEDIA_BUS_FMT_RGB666_1X7X3_SPWG, "RGB666_1X7X3_SPWG" }, + { MEDIA_BUS_FMT_YUV8_1X24, "YUV8_1X24" }, + { MEDIA_BUS_FMT_UYYVYY8_0_5X24, "UYYVYY8_0_5X24" }, + { MEDIA_BUS_FMT_YUV10_1X30, "YUV10_1X30" }, + { MEDIA_BUS_FMT_UYYVYY10_0_5X30, "UYYVYY10_0_5X30" }, + { MEDIA_BUS_FMT_RGB888_3X8, "RGB888_3X8" }, + { MEDIA_BUS_FMT_RGB888_1X24, "RGB888_1X24" }, + { MEDIA_BUS_FMT_RGB888_1X7X4_SPWG, "RGB888_1X7X4_SPWG" }, + { MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA, "RGB888_1X7X4_JEIDA" }, + { MEDIA_BUS_FMT_UYVY8_2X8, "UYVY8_2X8" }, + { MEDIA_BUS_FMT_YUYV8_1X16, "YUYV8_1X16" }, + { MEDIA_BUS_FMT_UYVY8_1X16, "UYVY8_1X16" }, + { MEDIA_BUS_FMT_RGB101010_1X30, "RGB101010_1X30" }, + { MEDIA_BUS_FMT_YUYV10_1X20, "YUYV10_1X20" }, +}; + +static DRM_ENUM_NAME_FN(drm_get_bus_format_name, drm_bus_format_enum_list) + static const struct regmap_config vop2_regmap_config; static struct vop2_video_port *to_vop2_video_port(struct drm_crtc *crtc) @@ -278,6 +311,15 @@ static u32 vop2_readl(struct vop2 *vop2, u32 offset) return val; } +static u32 vop2_vp_read(struct vop2_video_port *vp, u32 offset) +{ + u32 val; + + regmap_read(vp->vop2->map, vp->data->offset + offset, &val); + + return val; +} + static void vop2_win_write(const struct vop2_win *win, unsigned int reg, u32 v) { regmap_field_write(win->reg[reg], v); @@ -550,6 +592,25 @@ static bool rockchip_vop2_mod_supported(struct drm_plane *plane, u32 format, if (modifier == DRM_FORMAT_MOD_INVALID) return false; + if (vop2->data->soc_id == 3568 || vop2->data->soc_id == 3566) { + if (vop2_cluster_window(win)) { + if (modifier == DRM_FORMAT_MOD_LINEAR) { + drm_dbg_kms(vop2->drm, + "Cluster window only supports format with afbc\n"); + return false; + } + } + } + + if (format == DRM_FORMAT_XRGB2101010 || format == DRM_FORMAT_XBGR2101010) { + if (vop2->data->soc_id == 3588) { + if (!rockchip_afbc(plane, modifier)) { + drm_dbg_kms(vop2->drm, "Only support 32 bpp format with afbc\n"); + return false; + } + } + } + if (modifier == DRM_FORMAT_MOD_LINEAR) return true; @@ -998,6 +1059,67 @@ static void vop2_disable(struct vop2 *vop2) clk_disable_unprepare(vop2->hclk); } +static bool vop2_vp_dsp_lut_is_enabled(struct vop2_video_port *vp) +{ + u32 dsp_ctrl = vop2_vp_read(vp, RK3568_VP_DSP_CTRL); + + return dsp_ctrl & RK3568_VP_DSP_CTRL__DSP_LUT_EN; +} + +static void vop2_vp_dsp_lut_disable(struct vop2_video_port *vp) +{ + u32 dsp_ctrl = vop2_vp_read(vp, RK3568_VP_DSP_CTRL); + + dsp_ctrl &= ~RK3568_VP_DSP_CTRL__DSP_LUT_EN; + vop2_vp_write(vp, RK3568_VP_DSP_CTRL, dsp_ctrl); +} + +static bool vop2_vp_dsp_lut_poll_disabled(struct vop2_video_port *vp) +{ + u32 dsp_ctrl; + int ret = readx_poll_timeout(vop2_vp_dsp_lut_is_enabled, vp, dsp_ctrl, + !dsp_ctrl, 5, 30 * 1000); + if (ret) { + drm_err(vp->vop2->drm, "display LUT RAM enable timeout!\n"); + return false; + } + + return true; +} + +static void vop2_vp_dsp_lut_enable(struct vop2_video_port *vp) +{ + u32 dsp_ctrl = vop2_vp_read(vp, RK3568_VP_DSP_CTRL); + + dsp_ctrl |= RK3568_VP_DSP_CTRL__DSP_LUT_EN; + vop2_vp_write(vp, RK3568_VP_DSP_CTRL, dsp_ctrl); +} + +static void vop2_vp_dsp_lut_update_enable(struct vop2_video_port *vp) +{ + u32 dsp_ctrl = vop2_vp_read(vp, RK3568_VP_DSP_CTRL); + + dsp_ctrl |= RK3588_VP_DSP_CTRL__GAMMA_UPDATE_EN; + vop2_vp_write(vp, RK3568_VP_DSP_CTRL, dsp_ctrl); +} + +static inline bool vop2_supports_seamless_gamma_lut_update(struct vop2 *vop2) +{ + return (vop2->data->soc_id != 3566 && vop2->data->soc_id != 3568); +} + +static bool vop2_gamma_lut_in_use(struct vop2 *vop2, struct vop2_video_port *vp) +{ + const int nr_vps = vop2->data->nr_vps; + int gamma_en_vp_id; + + for (gamma_en_vp_id = 0; gamma_en_vp_id < nr_vps; gamma_en_vp_id++) + if (vop2_vp_dsp_lut_is_enabled(&vop2->vps[gamma_en_vp_id])) + break; + + return gamma_en_vp_id != nr_vps && gamma_en_vp_id != vp->id; +} + static void vop2_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state) { @@ -1271,8 +1393,9 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, dsp_w = drm_rect_width(dest); if (dest->x1 + dsp_w > adjusted_mode->hdisplay) { - drm_err(vop2->drm, "vp%d %s dest->x1[%d] + dsp_w[%d] exceed mode hdisplay[%d]\n", - vp->id, win->data->name, dest->x1, dsp_w, adjusted_mode->hdisplay); + drm_dbg_kms(vop2->drm, + "vp%d %s dest->x1[%d] + dsp_w[%d] exceed mode hdisplay[%d]\n", + vp->id, win->data->name, dest->x1, dsp_w, adjusted_mode->hdisplay); dsp_w = adjusted_mode->hdisplay - dest->x1; if (dsp_w < 4) dsp_w = 4; @@ -1282,8 +1405,9 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, dsp_h = drm_rect_height(dest); if (dest->y1 + dsp_h > adjusted_mode->vdisplay) { - drm_err(vop2->drm, "vp%d %s dest->y1[%d] + dsp_h[%d] exceed mode vdisplay[%d]\n", - vp->id, win->data->name, dest->y1, dsp_h, adjusted_mode->vdisplay); + drm_dbg_kms(vop2->drm, + "vp%d %s dest->y1[%d] + dsp_h[%d] exceed mode vdisplay[%d]\n", + vp->id, win->data->name, dest->y1, dsp_h, adjusted_mode->vdisplay); dsp_h = adjusted_mode->vdisplay - dest->y1; if (dsp_h < 4) dsp_h = 4; @@ -1296,15 +1420,15 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, */ if (!(win->data->feature & WIN_FEATURE_AFBDC)) { if (actual_w > dsp_w && (actual_w & 0xf) == 1) { - drm_err(vop2->drm, "vp%d %s act_w[%d] MODE 16 == 1\n", - vp->id, win->data->name, actual_w); + drm_dbg_kms(vop2->drm, "vp%d %s act_w[%d] MODE 16 == 1\n", + vp->id, win->data->name, actual_w); actual_w -= 1; } } if (afbc_en && actual_w % 4) { - drm_err(vop2->drm, "vp%d %s actual_w[%d] not 4 pixel aligned\n", - vp->id, win->data->name, actual_w); + drm_dbg_kms(vop2->drm, "vp%d %s actual_w[%d] not 4 pixel aligned\n", + vp->id, win->data->name, actual_w); actual_w = ALIGN_DOWN(actual_w, 4); } @@ -1320,6 +1444,12 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, &fb->format->format, afbc_en ? "AFBC" : "", &yrgb_mst); + if (vop2->data->soc_id > 3568) { + vop2_win_write(win, VOP2_WIN_AXI_BUS_ID, win->data->axi_bus_id); + vop2_win_write(win, VOP2_WIN_AXI_YRGB_R_ID, win->data->axi_yrgb_r_id); + vop2_win_write(win, VOP2_WIN_AXI_UV_R_ID, win->data->axi_uv_r_id); + } + if (vop2_cluster_window(win)) vop2_win_write(win, VOP2_WIN_AFBC_HALF_BLOCK_EN, half_block_en); @@ -1341,8 +1471,8 @@ static void vop2_plane_atomic_update(struct drm_plane *plane, */ stride = (fb->pitches[0] << 3) / bpp; if ((stride & 0x3f) && (xmirror || rotate_90 || rotate_270)) - drm_err(vop2->drm, "vp%d %s stride[%d] not 64 pixel aligned\n", - vp->id, win->data->name, stride); + drm_dbg_kms(vop2->drm, "vp%d %s stride[%d] not 64 pixel aligned\n", + vp->id, win->data->name, stride); uv_swap = vop2_afbc_uv_swap(fb->format->format); /* @@ -1482,6 +1612,77 @@ static bool vop2_crtc_mode_fixup(struct drm_crtc *crtc, return true; } +static void vop2_crtc_write_gamma_lut(struct vop2 *vop2, struct drm_crtc *crtc) +{ + const struct vop2_video_port *vp = to_vop2_video_port(crtc); + const struct vop2_video_port_data *vp_data = &vop2->data->vp[vp->id]; + struct drm_color_lut *lut = crtc->state->gamma_lut->data; + unsigned int i, bpc = ilog2(vp_data->gamma_lut_len); + u32 word; + + for (i = 0; i < crtc->gamma_size; i++) { + word = (drm_color_lut_extract(lut[i].blue, bpc) << (2 * bpc)) | + (drm_color_lut_extract(lut[i].green, bpc) << bpc) | + drm_color_lut_extract(lut[i].red, bpc); + + writel(word, vop2->lut_regs + i * 4); + } +} + +static void vop2_crtc_atomic_set_gamma_seamless(struct vop2 *vop2, + struct vop2_video_port *vp, + struct drm_crtc *crtc) +{ + vop2_writel(vop2, RK3568_LUT_PORT_SEL, + FIELD_PREP(RK3588_LUT_PORT_SEL__GAMMA_AHB_WRITE_SEL, vp->id)); + vop2_vp_dsp_lut_enable(vp); + vop2_crtc_write_gamma_lut(vop2, crtc); + vop2_vp_dsp_lut_update_enable(vp); +} + +static void vop2_crtc_atomic_set_gamma_rk356x(struct vop2 *vop2, + struct vop2_video_port *vp, + struct drm_crtc *crtc) +{ + vop2_vp_dsp_lut_disable(vp); + vop2_cfg_done(vp); + if (!vop2_vp_dsp_lut_poll_disabled(vp)) + return; + + vop2_writel(vop2, RK3568_LUT_PORT_SEL, vp->id); + vop2_crtc_write_gamma_lut(vop2, crtc); + vop2_vp_dsp_lut_enable(vp); +} + +static void vop2_crtc_atomic_try_set_gamma(struct vop2 *vop2, + struct vop2_video_port *vp, + struct drm_crtc *crtc, + struct drm_crtc_state *crtc_state) +{ + if (!vop2->lut_regs) + return; + + if (!crtc_state->gamma_lut) { + vop2_vp_dsp_lut_disable(vp); + return; + } + + if (vop2_supports_seamless_gamma_lut_update(vop2)) + vop2_crtc_atomic_set_gamma_seamless(vop2, vp, crtc); + else + vop2_crtc_atomic_set_gamma_rk356x(vop2, vp, crtc); +} + +static inline void vop2_crtc_atomic_try_set_gamma_locked(struct vop2 *vop2, + struct vop2_video_port *vp, + struct drm_crtc *crtc, + struct drm_crtc_state *crtc_state) +{ + vop2_lock(vop2); + vop2_crtc_atomic_try_set_gamma(vop2, vp, crtc, crtc_state); + vop2_unlock(vop2); +} + static void vop2_dither_setup(struct drm_crtc *crtc, u32 *dsp_ctrl) { struct rockchip_crtc_state *vcstate = to_rockchip_crtc_state(crtc->state); @@ -1721,9 +1922,9 @@ static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id, else dclk_out_rate = v_pixclk >> 2; - dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000); + dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000000); if (!dclk_rate) { - drm_err(vop2->drm, "DP dclk_out_rate out of range, dclk_out_rate: %ld KHZ\n", + drm_err(vop2->drm, "DP dclk_out_rate out of range, dclk_out_rate: %ld Hz\n", dclk_out_rate); return 0; } @@ -1738,9 +1939,9 @@ static unsigned long rk3588_calc_cru_cfg(struct vop2_video_port *vp, int id, * dclk_rate = N * dclk_core_rate N = (1,2,4 ), * we get a little factor here */ - dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000); + dclk_rate = rk3588_calc_dclk(dclk_out_rate, 600000000); if (!dclk_rate) { - drm_err(vop2->drm, "MIPI dclk out of range, dclk_out_rate: %ld KHZ\n", + drm_err(vop2->drm, "MIPI dclk out of range, dclk_out_rate: %ld Hz\n", dclk_out_rate); return 0; } @@ -2057,11 +2258,40 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc, vop2_vp_write(vp, RK3568_VP_DSP_CTRL, dsp_ctrl); + vop2_crtc_atomic_try_set_gamma(vop2, vp, crtc, crtc_state); + drm_crtc_vblank_on(crtc); vop2_unlock(vop2); } +static int vop2_crtc_atomic_check_gamma(struct vop2_video_port *vp, + struct drm_crtc *crtc, + struct drm_atomic_state *state, + struct drm_crtc_state *crtc_state) +{ + struct vop2 *vop2 = vp->vop2; + unsigned int len; + + if (!vp->vop2->lut_regs || !crtc_state->color_mgmt_changed || + !crtc_state->gamma_lut) + return 0; + + len = drm_color_lut_size(crtc_state->gamma_lut); + if (len != crtc->gamma_size) { + drm_dbg(vop2->drm, "Invalid LUT size; got %d, expected %d\n", + len, crtc->gamma_size); + return -EINVAL; + } + + if (!vop2_supports_seamless_gamma_lut_update(vop2) && vop2_gamma_lut_in_use(vop2, vp)) { + drm_info(vop2->drm, "Gamma LUT can be enabled for only one CRTC at a time\n"); + return -EINVAL; + } + + return 0; +} + static int vop2_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { @@ -2069,6 +2299,11 @@ static int vop2_crtc_atomic_check(struct drm_crtc *crtc, struct drm_plane *plane; int nplanes = 0; struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc); + int ret; + + ret = vop2_crtc_atomic_check_gamma(vp, crtc, state, crtc_state); + if (ret) + return ret; drm_atomic_crtc_state_for_each_plane(plane, crtc_state) nplanes++; @@ -2159,7 +2394,6 @@ static int vop2_find_start_mixer_id_for_vp(struct vop2 *vop2, u8 port_id) static void vop2_setup_cluster_alpha(struct vop2 *vop2, struct vop2_win *main_win) { - u32 offset = (main_win->data->phys_id * 0x10); struct vop2_alpha_config alpha_config; struct vop2_alpha alpha; struct drm_plane_state *bottom_win_pstate; @@ -2167,6 +2401,7 @@ static void vop2_setup_cluster_alpha(struct vop2 *vop2, struct vop2_win *main_wi u16 src_glb_alpha_val, dst_glb_alpha_val; bool premulti_en = false; bool swap = false; + u32 offset = 0; /* At one win mode, win0 is dst/bottom win, and win1 is a all zero src/top win */ bottom_win_pstate = main_win->base.state; @@ -2185,6 +2420,22 @@ static void vop2_setup_cluster_alpha(struct vop2 *vop2, struct vop2_win *main_wi vop2_parse_alpha(&alpha_config, &alpha); alpha.src_color_ctrl.bits.src_dst_swap = swap; + + switch (main_win->data->phys_id) { + case ROCKCHIP_VOP2_CLUSTER0: + offset = 0x0; + break; + case ROCKCHIP_VOP2_CLUSTER1: + offset = 0x10; + break; + case ROCKCHIP_VOP2_CLUSTER2: + offset = 0x20; + break; + case ROCKCHIP_VOP2_CLUSTER3: + offset = 0x30; + break; + } + vop2_writel(vop2, RK3568_CLUSTER0_MIX_SRC_COLOR_CTRL + offset, alpha.src_color_ctrl.val); vop2_writel(vop2, RK3568_CLUSTER0_MIX_DST_COLOR_CTRL + offset, @@ -2232,6 +2483,12 @@ static void vop2_setup_alpha(struct vop2_video_port *vp) struct vop2_win *win = to_vop2_win(plane); int zpos = plane->state->normalized_zpos; + /* + * Need to configure alpha from second layer. + */ + if (zpos == 0) + continue; + if (plane->state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI) premulti_en = 1; else @@ -2308,7 +2565,10 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp) struct drm_plane *plane; u32 layer_sel = 0; u32 port_sel; - unsigned int nlayer, ofs; + u8 layer_id; + u8 old_layer_id; + u8 layer_sel_id; + unsigned int ofs; u32 ovl_ctrl; int i; struct vop2_video_port *vp0 = &vop2->vps[0]; @@ -2352,9 +2612,30 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp) for (i = 0; i < vp->id; i++) ofs += vop2->vps[i].nlayers; - nlayer = 0; drm_atomic_crtc_for_each_plane(plane, &vp->crtc) { struct vop2_win *win = to_vop2_win(plane); + struct vop2_win *old_win; + + layer_id = (u8)(plane->state->normalized_zpos + ofs); + + /* + * Find the layer this win bind in old state. + */ + for (old_layer_id = 0; old_layer_id < vop2->data->win_size; old_layer_id++) { + layer_sel_id = (layer_sel >> (4 * old_layer_id)) & 0xf; + if (layer_sel_id == win->data->layer_sel_id) + break; + } + + /* + * Find the win bind to this layer in old state + */ + for (i = 0; i < vop2->data->win_size; i++) { + old_win = &vop2->win[i]; + layer_sel_id = (layer_sel >> (4 * layer_id)) & 0xf; + if (layer_sel_id == old_win->data->layer_sel_id) + break; + } switch (win->data->phys_id) { case ROCKCHIP_VOP2_CLUSTER0: @@ -2399,17 +2680,14 @@ static void vop2_setup_layer_mixer(struct vop2_video_port *vp) break; } - layer_sel &= ~RK3568_OVL_LAYER_SEL__LAYER(plane->state->normalized_zpos + ofs, - 0x7); - layer_sel |= RK3568_OVL_LAYER_SEL__LAYER(plane->state->normalized_zpos + ofs, - win->data->layer_sel_id); - nlayer++; - } - - /* configure unused layers to 0x5 (reserved) */ - for (; nlayer < vp->nlayers; nlayer++) { - layer_sel &= ~RK3568_OVL_LAYER_SEL__LAYER(nlayer + ofs, 0x7); - layer_sel |= RK3568_OVL_LAYER_SEL__LAYER(nlayer + ofs, 5); + layer_sel &= ~RK3568_OVL_LAYER_SEL__LAYER(layer_id, 0x7); + layer_sel |= RK3568_OVL_LAYER_SEL__LAYER(layer_id, win->data->layer_sel_id); + /* + * When we bind a window from layerM to layerN, we also need to move the old + * window on layerN to layerM to avoid one window selected by two or more layers. + */ + layer_sel &= ~RK3568_OVL_LAYER_SEL__LAYER(old_layer_id, 0x7); + layer_sel |= RK3568_OVL_LAYER_SEL__LAYER(old_layer_id, old_win->data->layer_sel_id); } vop2_writel(vop2, RK3568_OVL_LAYER_SEL, layer_sel); @@ -2444,9 +2722,11 @@ static void vop2_setup_dly_for_windows(struct vop2 *vop2) sdly |= FIELD_PREP(RK3568_SMART_DLY_NUM__ESMART1, dly); break; case ROCKCHIP_VOP2_SMART0: + case ROCKCHIP_VOP2_ESMART2: sdly |= FIELD_PREP(RK3568_SMART_DLY_NUM__SMART0, dly); break; case ROCKCHIP_VOP2_SMART1: + case ROCKCHIP_VOP2_ESMART3: sdly |= FIELD_PREP(RK3568_SMART_DLY_NUM__SMART1, dly); break; } @@ -2487,7 +2767,13 @@ static void vop2_crtc_atomic_begin(struct drm_crtc *crtc, static void vop2_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state) { + struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc); struct vop2_video_port *vp = to_vop2_video_port(crtc); + struct vop2 *vop2 = vp->vop2; + + /* In case of modeset, gamma lut update already happened in atomic enable */ + if (!drm_atomic_crtc_needs_modeset(crtc_state) && crtc_state->color_mgmt_changed) + vop2_crtc_atomic_try_set_gamma_locked(vop2, vp, crtc, crtc_state); vop2_post_config(crtc); @@ -2513,6 +2799,228 @@ static const struct drm_crtc_helper_funcs vop2_crtc_helper_funcs = { .atomic_disable = vop2_crtc_atomic_disable, }; +static void vop2_dump_connector_on_crtc(struct drm_crtc *crtc, struct seq_file *s) +{ + struct drm_connector_list_iter conn_iter; + struct drm_connector *connector; + + drm_connector_list_iter_begin(crtc->dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { + if (crtc->state->connector_mask & drm_connector_mask(connector)) + seq_printf(s, " Connector: %s\n", connector->name); + } + drm_connector_list_iter_end(&conn_iter); +} + +static int vop2_plane_state_dump(struct seq_file *s, struct drm_plane *plane) +{ + struct vop2_win *win = to_vop2_win(plane); + struct drm_plane_state *pstate = plane->state; + struct drm_rect *src, *dst; + struct drm_framebuffer *fb; + struct drm_gem_object *obj; + struct rockchip_gem_object *rk_obj; + bool xmirror; + bool ymirror; + bool rotate_270; + bool rotate_90; + dma_addr_t fb_addr; + int i; + + seq_printf(s, " %s: %s\n", win->data->name, !pstate ? + "DISABLED" : pstate->crtc ? "ACTIVE" : "DISABLED"); + + if (!pstate || !pstate->fb) + return 0; + + fb = pstate->fb; + src = &pstate->src; + dst = &pstate->dst; + xmirror = pstate->rotation & DRM_MODE_REFLECT_X ? true : false; + ymirror = pstate->rotation & DRM_MODE_REFLECT_Y ? true : false; + rotate_270 = pstate->rotation & DRM_MODE_ROTATE_270; + rotate_90 = pstate->rotation & DRM_MODE_ROTATE_90; + + seq_printf(s, "\twin_id: %d\n", win->win_id); + + seq_printf(s, "\tformat: %p4cc%s glb_alpha[0x%x]\n", + &fb->format->format, + drm_is_afbc(fb->modifier) ? "[AFBC]" : "", + pstate->alpha >> 8); + seq_printf(s, "\trotate: xmirror: %d ymirror: %d rotate_90: %d rotate_270: %d\n", + xmirror, ymirror, rotate_90, rotate_270); + seq_printf(s, "\tzpos: %d\n", pstate->normalized_zpos); + seq_printf(s, "\tsrc: pos[%d, %d] rect[%d x %d]\n", src->x1 >> 16, + src->y1 >> 16, drm_rect_width(src) >> 16, + drm_rect_height(src) >> 16); + seq_printf(s, "\tdst: pos[%d, %d] rect[%d x %d]\n", dst->x1, dst->y1, + drm_rect_width(dst), drm_rect_height(dst)); + + for (i = 0; i < fb->format->num_planes; i++) { + obj = fb->obj[i]; + rk_obj = to_rockchip_obj(obj); + fb_addr = rk_obj->dma_addr + fb->offsets[i]; + + seq_printf(s, "\tbuf[%d]: addr: %pad pitch: %d offset: %d\n", + i, &fb_addr, fb->pitches[i], fb->offsets[i]); + } + + return 0; +} + +static int vop2_crtc_state_dump(struct drm_crtc *crtc, struct seq_file *s) +{ + struct vop2_video_port *vp = to_vop2_video_port(crtc); + struct drm_crtc_state *cstate = crtc->state; + struct rockchip_crtc_state *vcstate; + struct drm_display_mode *mode; + struct drm_plane *plane; + bool interlaced; + + seq_printf(s, "Video Port%d: %s\n", vp->id, !cstate ? + "DISABLED" : cstate->active ? "ACTIVE" : "DISABLED"); + + if (!cstate || !cstate->active) + return 0; + + mode = &crtc->state->adjusted_mode; + vcstate = to_rockchip_crtc_state(cstate); + interlaced = !!(mode->flags & DRM_MODE_FLAG_INTERLACE); + + vop2_dump_connector_on_crtc(crtc, s); + seq_printf(s, "\tbus_format[%x]: %s\n", vcstate->bus_format, + drm_get_bus_format_name(vcstate->bus_format)); + seq_printf(s, "\toutput_mode[%x]", vcstate->output_mode); + seq_printf(s, " color_space[%d]\n", vcstate->color_space); + seq_printf(s, " Display mode: %dx%d%s%d\n", + mode->hdisplay, mode->vdisplay, interlaced ? "i" : "p", + drm_mode_vrefresh(mode)); + seq_printf(s, "\tclk[%d] real_clk[%d] type[%x] flag[%x]\n", + mode->clock, mode->crtc_clock, mode->type, mode->flags); + seq_printf(s, "\tH: %d %d %d %d\n", mode->hdisplay, mode->hsync_start, + mode->hsync_end, mode->htotal); + seq_printf(s, "\tV: %d %d %d %d\n", mode->vdisplay, mode->vsync_start, + mode->vsync_end, mode->vtotal); + + drm_atomic_crtc_for_each_plane(plane, crtc) { + vop2_plane_state_dump(s, plane); + } + + return 0; +} + +static int vop2_summary_show(struct seq_file *s, void *data) +{ + struct drm_info_node *node = s->private; + struct drm_minor *minor = node->minor; + struct drm_device *drm_dev = minor->dev; + struct drm_crtc *crtc; + + drm_modeset_lock_all(drm_dev); + drm_for_each_crtc(crtc, drm_dev) { + vop2_crtc_state_dump(crtc, s); + } + drm_modeset_unlock_all(drm_dev); + + return 0; +} + +static void vop2_regs_print(struct vop2 *vop2, struct seq_file *s, + const struct vop2_regs_dump *dump, bool active_only) +{ + resource_size_t start; + u32 val; + int i; + + if (dump->en_mask && active_only) { + val = vop2_readl(vop2, dump->base + dump->en_reg); + if ((val & dump->en_mask) != dump->en_val) + return; + } + + seq_printf(s, "\n%s:\n", dump->name); + + start = vop2->res->start + dump->base; + for (i = 0; i < dump->size >> 2; i += 4) { + seq_printf(s, "%08x: %08x %08x %08x %08x\n", (u32)start + i * 4, + vop2_readl(vop2, dump->base + (4 * i)), + vop2_readl(vop2, dump->base + (4 * (i + 1))), + vop2_readl(vop2, dump->base + (4 * (i + 2))), + vop2_readl(vop2, dump->base + (4 * (i + 3)))); + } +} + +static void __vop2_regs_dump(struct seq_file *s, bool active_only) +{ + struct drm_info_node *node = s->private; + struct vop2 *vop2 = node->info_ent->data; + struct drm_minor *minor = node->minor; + struct drm_device *drm_dev = minor->dev; + const struct vop2_regs_dump *dump; + unsigned int i; + + drm_modeset_lock_all(drm_dev); + + regcache_drop_region(vop2->map, 0, vop2_regmap_config.max_register); + + if (vop2->enable_count) { + for (i = 0; i < vop2->data->regs_dump_size; i++) { + dump = &vop2->data->regs_dump[i]; + vop2_regs_print(vop2, s, dump, active_only); + } + } else { + seq_puts(s, "VOP disabled\n"); + } + drm_modeset_unlock_all(drm_dev); +} + +static int vop2_regs_show(struct seq_file *s, void *arg) +{ + __vop2_regs_dump(s, false); + + return 0; +} + +static int vop2_active_regs_show(struct seq_file *s, void *data) +{ + __vop2_regs_dump(s, true); + + return 0; +} + +static struct drm_info_list vop2_debugfs_list[] = { + { "summary", vop2_summary_show, 0, NULL }, + { "active_regs", vop2_active_regs_show, 0, NULL }, + { "regs", vop2_regs_show, 0, NULL }, +}; + +static void vop2_debugfs_init(struct vop2 *vop2, struct drm_minor *minor) +{ + struct dentry *root; + unsigned int i; + + root = debugfs_create_dir("vop2", minor->debugfs_root); + if (!IS_ERR(root)) { + for (i = 0; i < ARRAY_SIZE(vop2_debugfs_list); i++) + vop2_debugfs_list[i].data = vop2; + + drm_debugfs_create_files(vop2_debugfs_list, + ARRAY_SIZE(vop2_debugfs_list), + root, minor); + } +} + +static int vop2_crtc_late_register(struct drm_crtc *crtc) +{ + struct vop2_video_port *vp = to_vop2_video_port(crtc); + struct vop2 *vop2 = vp->vop2; + + if (drm_crtc_index(crtc) == 0) + vop2_debugfs_init(vop2, crtc->dev->primary); + + return 0; +} + static struct drm_crtc_state *vop2_crtc_duplicate_state(struct drm_crtc *crtc) { struct rockchip_crtc_state *vcstate; @@ -2562,6 +3070,7 @@ static const struct drm_crtc_funcs vop2_crtc_funcs = { .atomic_destroy_state = vop2_crtc_destroy_state, .enable_vblank = vop2_crtc_enable_vblank, .disable_vblank = vop2_crtc_disable_vblank, + .late_register = vop2_crtc_late_register, }; static irqreturn_t vop2_isr(int irq, void *data) @@ -2790,7 +3299,12 @@ static int vop2_create_crtcs(struct vop2 *vop2) } drm_crtc_helper_add(&vp->crtc, &vop2_crtc_helper_funcs); + if (vop2->lut_regs) { + const struct vop2_video_port_data *vp_data = &vop2_data->vp[vp->id]; + drm_mode_crtc_set_gamma_size(&vp->crtc, vp_data->gamma_lut_len); + drm_crtc_enable_color_mgmt(&vp->crtc, 0, false, vp_data->gamma_lut_len); + } init_completion(&vp->dsp_hold_completion); } @@ -2865,6 +3379,10 @@ static struct reg_field vop2_cluster_regs[VOP2_WIN_MAX_REG] = { [VOP2_WIN_Y2R_EN] = REG_FIELD(RK3568_CLUSTER_WIN_CTRL0, 8, 8), [VOP2_WIN_R2Y_EN] = REG_FIELD(RK3568_CLUSTER_WIN_CTRL0, 9, 9), [VOP2_WIN_CSC_MODE] = REG_FIELD(RK3568_CLUSTER_WIN_CTRL0, 10, 11), + [VOP2_WIN_AXI_YRGB_R_ID] = REG_FIELD(RK3568_CLUSTER_WIN_CTRL2, 0, 3), + [VOP2_WIN_AXI_UV_R_ID] = REG_FIELD(RK3568_CLUSTER_WIN_CTRL2, 5, 8), + /* RK3588 only, reserved bit on rk3568*/ + [VOP2_WIN_AXI_BUS_ID] = REG_FIELD(RK3568_CLUSTER_CTRL, 13, 13), /* Scale */ [VOP2_WIN_SCALE_YRGB_X] = REG_FIELD(RK3568_CLUSTER_WIN_SCL_FACTOR_YRGB, 0, 15), @@ -2957,6 +3475,10 @@ static struct reg_field vop2_esmart_regs[VOP2_WIN_MAX_REG] = { [VOP2_WIN_YMIRROR] = REG_FIELD(RK3568_SMART_CTRL1, 31, 31), [VOP2_WIN_COLOR_KEY] = REG_FIELD(RK3568_SMART_COLOR_KEY_CTRL, 0, 29), [VOP2_WIN_COLOR_KEY_EN] = REG_FIELD(RK3568_SMART_COLOR_KEY_CTRL, 31, 31), + [VOP2_WIN_AXI_YRGB_R_ID] = REG_FIELD(RK3568_SMART_CTRL1, 4, 8), + [VOP2_WIN_AXI_UV_R_ID] = REG_FIELD(RK3568_SMART_CTRL1, 12, 16), + /* RK3588 only, reserved register on rk3568 */ + [VOP2_WIN_AXI_BUS_ID] = REG_FIELD(RK3588_SMART_AXI_CTRL, 1, 1), /* Scale */ [VOP2_WIN_SCALE_YRGB_X] = REG_FIELD(RK3568_SMART_REGION0_SCL_FACTOR_YRGB, 0, 15), @@ -3106,6 +3628,7 @@ static int vop2_bind(struct device *dev, struct device *master, void *data) return -EINVAL; } + vop2->res = res; vop2->regs = devm_ioremap_resource(dev, res); if (IS_ERR(vop2->regs)) return PTR_ERR(vop2->regs); diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h index 615a16196aff..29cc7fb8f6d8 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author:Mark Yao <mark.yao@rock-chips.com> */ @@ -9,6 +9,7 @@ #include <linux/regmap.h> #include <drm/drm_modes.h> +#include "rockchip_drm_drv.h" #include "rockchip_drm_vop.h" #define VOP2_VP_FEATURE_OUTPUT_10BIT BIT(0) @@ -78,6 +79,9 @@ enum vop2_win_regs { VOP2_WIN_COLOR_KEY, VOP2_WIN_COLOR_KEY_EN, VOP2_WIN_DITHER_UP, + VOP2_WIN_AXI_BUS_ID, + VOP2_WIN_AXI_YRGB_R_ID, + VOP2_WIN_AXI_UV_R_ID, /* scale regs */ VOP2_WIN_SCALE_YRGB_X, @@ -122,6 +126,15 @@ enum vop2_win_regs { VOP2_WIN_MAX_REG, }; +struct vop2_regs_dump { + const char *name; + u32 base; + u32 size; + u32 en_reg; + u32 en_val; + u32 en_mask; +}; + struct vop2_win_data { const char *name; unsigned int phys_id; @@ -140,6 +153,10 @@ struct vop2_win_data { unsigned int layer_sel_id; uint64_t feature; + uint8_t axi_bus_id; + uint8_t axi_yrgb_r_id; + uint8_t axi_uv_r_id; + unsigned int max_upscale_factor; unsigned int max_downscale_factor; const u8 dly[VOP2_DLY_MODE_MAX]; @@ -160,10 +177,12 @@ struct vop2_data { u64 feature; const struct vop2_win_data *win; const struct vop2_video_port_data *vp; + const struct vop2_regs_dump *regs_dump; struct vop_rect max_input; struct vop_rect max_output; unsigned int win_size; + unsigned int regs_dump_size; unsigned int soc_id; }; @@ -308,6 +327,7 @@ enum dst_factor_mode { #define RK3568_CLUSTER_WIN_CTRL0 0x00 #define RK3568_CLUSTER_WIN_CTRL1 0x04 +#define RK3568_CLUSTER_WIN_CTRL2 0x08 #define RK3568_CLUSTER_WIN_YRGB_MST 0x10 #define RK3568_CLUSTER_WIN_CBR_MST 0x14 #define RK3568_CLUSTER_WIN_VIR 0x18 @@ -330,6 +350,7 @@ enum dst_factor_mode { /* (E)smart register definition, offset relative to window base */ #define RK3568_SMART_CTRL0 0x00 #define RK3568_SMART_CTRL1 0x04 +#define RK3588_SMART_AXI_CTRL 0x08 #define RK3568_SMART_REGION0_CTRL 0x10 #define RK3568_SMART_REGION0_YRGB_MST 0x14 #define RK3568_SMART_REGION0_CBR_MST 0x18 @@ -394,6 +415,7 @@ enum dst_factor_mode { #define RK3568_REG_CFG_DONE__GLB_CFG_DONE_EN BIT(15) #define RK3568_VP_DSP_CTRL__STANDBY BIT(31) +#define RK3568_VP_DSP_CTRL__DSP_LUT_EN BIT(28) #define RK3568_VP_DSP_CTRL__DITHER_DOWN_MODE BIT(20) #define RK3568_VP_DSP_CTRL__DITHER_DOWN_SEL GENMASK(19, 18) #define RK3568_VP_DSP_CTRL__DITHER_DOWN_EN BIT(17) @@ -408,6 +430,8 @@ enum dst_factor_mode { #define RK3568_VP_DSP_CTRL__CORE_DCLK_DIV BIT(4) #define RK3568_VP_DSP_CTRL__OUT_MODE GENMASK(3, 0) +#define RK3588_VP_DSP_CTRL__GAMMA_UPDATE_EN BIT(22) + #define RK3588_VP_CLK_CTRL__DCLK_OUT_DIV GENMASK(3, 2) #define RK3588_VP_CLK_CTRL__DCLK_CORE_DIV GENMASK(1, 0) @@ -460,6 +484,8 @@ enum dst_factor_mode { #define RK3588_DSP_IF_POL__DP1_PIN_POL GENMASK(14, 12) #define RK3588_DSP_IF_POL__DP0_PIN_POL GENMASK(10, 8) +#define RK3588_LUT_PORT_SEL__GAMMA_AHB_WRITE_SEL GENMASK(13, 12) + #define RK3568_VP0_MIPI_CTRL__DCLK_DIV2_PHASE_LOCK BIT(5) #define RK3568_VP0_MIPI_CTRL__DCLK_DIV2 BIT(4) diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.c b/drivers/gpu/drm/rockchip/rockchip_lvds.c index aba733736ff7..385cf6881504 100644 --- a/drivers/gpu/drm/rockchip/rockchip_lvds.c +++ b/drivers/gpu/drm/rockchip/rockchip_lvds.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author: * Mark Yao <mark.yao@rock-chips.com> * Sandy Huang <hjc@rock-chips.com> diff --git a/drivers/gpu/drm/rockchip/rockchip_lvds.h b/drivers/gpu/drm/rockchip/rockchip_lvds.h index 4ce967d23813..ca83d7b6bea7 100644 --- a/drivers/gpu/drm/rockchip/rockchip_lvds.h +++ b/drivers/gpu/drm/rockchip/rockchip_lvds.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author: * Sandy Huang <hjc@rock-chips.com> * Mark Yao <mark.yao@rock-chips.com> diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.c b/drivers/gpu/drm/rockchip/rockchip_rgb.c index dbfbde24698e..811020665120 100644 --- a/drivers/gpu/drm/rockchip/rockchip_rgb.c +++ b/drivers/gpu/drm/rockchip/rockchip_rgb.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author: * Sandy Huang <hjc@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.h b/drivers/gpu/drm/rockchip/rockchip_rgb.h index 1bd4e20e91eb..116f958b894d 100644 --- a/drivers/gpu/drm/rockchip/rockchip_rgb.h +++ b/drivers/gpu/drm/rockchip/rockchip_rgb.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author: * Sandy Huang <hjc@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c index f9d87a0abc8b..65a88f489693 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop2_reg.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author: Andy Yan <andy.yan@rock-chips.com> */ @@ -258,6 +258,88 @@ static const struct vop2_win_data rk3568_vop_win_data[] = { }, }; +static const struct vop2_regs_dump rk3568_regs_dump[] = { + { + .name = "SYS", + .base = RK3568_REG_CFG_DONE, + .size = 0x100, + .en_reg = 0, + .en_val = 0, + .en_mask = 0 + }, { + .name = "OVL", + .base = RK3568_OVL_CTRL, + .size = 0x100, + .en_reg = 0, + .en_val = 0, + .en_mask = 0, + }, { + .name = "VP0", + .base = RK3568_VP0_CTRL_BASE, + .size = 0x100, + .en_reg = RK3568_VP_DSP_CTRL, + .en_val = 0, + .en_mask = RK3568_VP_DSP_CTRL__STANDBY, + }, { + .name = "VP1", + .base = RK3568_VP1_CTRL_BASE, + .size = 0x100, + .en_reg = RK3568_VP_DSP_CTRL, + .en_val = 0, + .en_mask = RK3568_VP_DSP_CTRL__STANDBY, + }, { + .name = "VP2", + .base = RK3568_VP2_CTRL_BASE, + .size = 0x100, + .en_reg = RK3568_VP_DSP_CTRL, + .en_val = 0, + .en_mask = RK3568_VP_DSP_CTRL__STANDBY, + + }, { + .name = "Cluster0", + .base = RK3568_CLUSTER0_CTRL_BASE, + .size = 0x110, + .en_reg = RK3568_CLUSTER_WIN_CTRL0, + .en_val = RK3568_CLUSTER_WIN_CTRL0__WIN0_EN, + .en_mask = RK3568_CLUSTER_WIN_CTRL0__WIN0_EN, + }, { + .name = "Cluster1", + .base = RK3568_CLUSTER1_CTRL_BASE, + .size = 0x110, + .en_reg = RK3568_CLUSTER_WIN_CTRL0, + .en_val = RK3568_CLUSTER_WIN_CTRL0__WIN0_EN, + .en_mask = RK3568_CLUSTER_WIN_CTRL0__WIN0_EN, + }, { + .name = "Esmart0", + .base = RK3568_ESMART0_CTRL_BASE, + .size = 0xf0, + .en_reg = RK3568_SMART_REGION0_CTRL, + .en_val = RK3568_SMART_REGION0_CTRL__WIN0_EN, + .en_mask = RK3568_SMART_REGION0_CTRL__WIN0_EN, + }, { + .name = "Esmart1", + .base = RK3568_ESMART1_CTRL_BASE, + .size = 0xf0, + .en_reg = RK3568_SMART_REGION0_CTRL, + .en_val = RK3568_SMART_REGION0_CTRL__WIN0_EN, + .en_mask = RK3568_SMART_REGION0_CTRL__WIN0_EN, + }, { + .name = "Smart0", + .base = RK3568_SMART0_CTRL_BASE, + .size = 0xf0, + .en_reg = RK3568_SMART_REGION0_CTRL, + .en_val = RK3568_SMART_REGION0_CTRL__WIN0_EN, + .en_mask = RK3568_SMART_REGION0_CTRL__WIN0_EN, + }, { + .name = "Smart1", + .base = RK3568_SMART1_CTRL_BASE, + .size = 0xf0, + .en_reg = RK3568_SMART_REGION0_CTRL, + .en_val = RK3568_SMART_REGION0_CTRL__WIN0_EN, + .en_mask = RK3568_SMART_REGION0_CTRL__WIN0_EN, + }, +}; + static const struct vop2_video_port_data rk3588_vop_video_ports[] = { { .id = 0, @@ -313,7 +395,7 @@ static const struct vop2_video_port_data rk3588_vop_video_ports[] = { * AXI1 is a read only bus. * * Every window on a AXI bus must assigned two unique - * read id(yrgb_id/uv_id, valid id are 0x1~0xe). + * read id(yrgb_r_id/uv_r_id, valid id are 0x1~0xe). * * AXI0: * Cluster0/1, Esmart0/1, WriteBack @@ -333,6 +415,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .layer_sel_id = 0, .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, + .axi_bus_id = 0, + .axi_yrgb_r_id = 2, + .axi_uv_r_id = 3, .max_upscale_factor = 4, .max_downscale_factor = 4, .dly = { 4, 26, 29 }, @@ -349,6 +434,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, .type = DRM_PLANE_TYPE_PRIMARY, + .axi_bus_id = 0, + .axi_yrgb_r_id = 6, + .axi_uv_r_id = 7, .max_upscale_factor = 4, .max_downscale_factor = 4, .dly = { 4, 26, 29 }, @@ -364,6 +452,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, .type = DRM_PLANE_TYPE_PRIMARY, + .axi_bus_id = 1, + .axi_yrgb_r_id = 2, + .axi_uv_r_id = 3, .max_upscale_factor = 4, .max_downscale_factor = 4, .dly = { 4, 26, 29 }, @@ -379,6 +470,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .supported_rotations = DRM_MODE_ROTATE_90 | DRM_MODE_ROTATE_270 | DRM_MODE_REFLECT_X | DRM_MODE_REFLECT_Y, .type = DRM_PLANE_TYPE_PRIMARY, + .axi_bus_id = 1, + .axi_yrgb_r_id = 6, + .axi_uv_r_id = 7, .max_upscale_factor = 4, .max_downscale_factor = 4, .dly = { 4, 26, 29 }, @@ -393,6 +487,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .layer_sel_id = 2, .supported_rotations = DRM_MODE_REFLECT_Y, .type = DRM_PLANE_TYPE_OVERLAY, + .axi_bus_id = 0, + .axi_yrgb_r_id = 0x0a, + .axi_uv_r_id = 0x0b, .max_upscale_factor = 8, .max_downscale_factor = 8, .dly = { 23, 45, 48 }, @@ -406,6 +503,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .layer_sel_id = 3, .supported_rotations = DRM_MODE_REFLECT_Y, .type = DRM_PLANE_TYPE_OVERLAY, + .axi_bus_id = 0, + .axi_yrgb_r_id = 0x0c, + .axi_uv_r_id = 0x01, .max_upscale_factor = 8, .max_downscale_factor = 8, .dly = { 23, 45, 48 }, @@ -419,6 +519,9 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .layer_sel_id = 6, .supported_rotations = DRM_MODE_REFLECT_Y, .type = DRM_PLANE_TYPE_OVERLAY, + .axi_bus_id = 1, + .axi_yrgb_r_id = 0x0a, + .axi_uv_r_id = 0x0b, .max_upscale_factor = 8, .max_downscale_factor = 8, .dly = { 23, 45, 48 }, @@ -432,12 +535,118 @@ static const struct vop2_win_data rk3588_vop_win_data[] = { .layer_sel_id = 7, .supported_rotations = DRM_MODE_REFLECT_Y, .type = DRM_PLANE_TYPE_OVERLAY, + .axi_bus_id = 1, + .axi_yrgb_r_id = 0x0c, + .axi_uv_r_id = 0x0d, .max_upscale_factor = 8, .max_downscale_factor = 8, .dly = { 23, 45, 48 }, }, }; +static const struct vop2_regs_dump rk3588_regs_dump[] = { + { + .name = "SYS", + .base = RK3568_REG_CFG_DONE, + .size = 0x100, + .en_reg = 0, + .en_val = 0, + .en_mask = 0 + }, { + .name = "OVL", + .base = RK3568_OVL_CTRL, + .size = 0x100, + .en_reg = 0, + .en_val = 0, + .en_mask = 0, + }, { + .name = "VP0", + .base = RK3568_VP0_CTRL_BASE, + .size = 0x100, + .en_reg = RK3568_VP_DSP_CTRL, + .en_val = 0, + .en_mask = RK3568_VP_DSP_CTRL__STANDBY, + }, { + .name = "VP1", + .base = RK3568_VP1_CTRL_BASE, + .size = 0x100, + .en_reg = RK3568_VP_DSP_CTRL, + .en_val = 0, + .en_mask = RK3568_VP_DSP_CTRL__STANDBY, + }, { + .name = "VP2", + .base = RK3568_VP2_CTRL_BASE, + .size = 0x100, + .en_reg = RK3568_VP_DSP_CTRL, + .en_val = 0, + .en_mask = RK3568_VP_DSP_CTRL__STANDBY, + + }, { + .name = "VP3", + .base = RK3588_VP3_CTRL_BASE, + .size = 0x100, + .en_reg = RK3568_VP_DSP_CTRL, + .en_val = 0, + .en_mask = RK3568_VP_DSP_CTRL__STANDBY, + }, { + .name = "Cluster0", + .base = RK3568_CLUSTER0_CTRL_BASE, + .size = 0x110, + .en_reg = RK3568_CLUSTER_WIN_CTRL0, + .en_val = RK3568_CLUSTER_WIN_CTRL0__WIN0_EN, + .en_mask = RK3568_CLUSTER_WIN_CTRL0__WIN0_EN, + }, { + .name = "Cluster1", + .base = RK3568_CLUSTER1_CTRL_BASE, + .size = 0x110, + .en_reg = RK3568_CLUSTER_WIN_CTRL0, + .en_val = RK3568_CLUSTER_WIN_CTRL0__WIN0_EN, + .en_mask = RK3568_CLUSTER_WIN_CTRL0__WIN0_EN, + }, { + .name = "Cluster2", + .base = RK3588_CLUSTER2_CTRL_BASE, + .size = 0x110, + .en_reg = RK3568_CLUSTER_WIN_CTRL0, + .en_val = RK3568_CLUSTER_WIN_CTRL0__WIN0_EN, + .en_mask = RK3568_CLUSTER_WIN_CTRL0__WIN0_EN, + }, { + .name = "Cluster3", + .base = RK3588_CLUSTER3_CTRL_BASE, + .size = 0x110, + .en_reg = RK3568_CLUSTER_WIN_CTRL0, + .en_val = RK3568_CLUSTER_WIN_CTRL0__WIN0_EN, + .en_mask = RK3568_CLUSTER_WIN_CTRL0__WIN0_EN, + }, { + .name = "Esmart0", + .base = RK3568_ESMART0_CTRL_BASE, + .size = 0xf0, + .en_reg = RK3568_SMART_REGION0_CTRL, + .en_val = RK3568_SMART_REGION0_CTRL__WIN0_EN, + .en_mask = RK3568_SMART_REGION0_CTRL__WIN0_EN, + }, { + .name = "Esmart1", + .base = RK3568_ESMART1_CTRL_BASE, + .size = 0xf0, + .en_reg = RK3568_SMART_REGION0_CTRL, + .en_val = RK3568_SMART_REGION0_CTRL__WIN0_EN, + .en_mask = RK3568_SMART_REGION0_CTRL__WIN0_EN, + }, { + .name = "Esmart2", + .base = RK3588_ESMART2_CTRL_BASE, + .size = 0xf0, + .en_reg = RK3568_SMART_REGION0_CTRL, + .en_val = RK3568_SMART_REGION0_CTRL__WIN0_EN, + .en_mask = RK3568_SMART_REGION0_CTRL__WIN0_EN, + }, { + .name = "Esmart3", + .base = RK3588_ESMART3_CTRL_BASE, + .size = 0xf0, + .en_reg = RK3568_SMART_REGION0_CTRL, + .en_val = RK3568_SMART_REGION0_CTRL__WIN0_EN, + .en_mask = RK3568_SMART_REGION0_CTRL__WIN0_EN, + }, +}; + static const struct vop2_data rk3566_vop = { .feature = VOP2_FEATURE_HAS_SYS_GRF, .nr_vps = 3, @@ -446,6 +655,8 @@ static const struct vop2_data rk3566_vop = { .vp = rk3568_vop_video_ports, .win = rk3568_vop_win_data, .win_size = ARRAY_SIZE(rk3568_vop_win_data), + .regs_dump = rk3568_regs_dump, + .regs_dump_size = ARRAY_SIZE(rk3568_regs_dump), .soc_id = 3566, }; @@ -457,6 +668,8 @@ static const struct vop2_data rk3568_vop = { .vp = rk3568_vop_video_ports, .win = rk3568_vop_win_data, .win_size = ARRAY_SIZE(rk3568_vop_win_data), + .regs_dump = rk3568_regs_dump, + .regs_dump_size = ARRAY_SIZE(rk3568_regs_dump), .soc_id = 3568, }; @@ -469,6 +682,8 @@ static const struct vop2_data rk3588_vop = { .vp = rk3588_vop_video_ports, .win = rk3588_vop_win_data, .win_size = ARRAY_SIZE(rk3588_vop_win_data), + .regs_dump = rk3588_regs_dump, + .regs_dump_size = ARRAY_SIZE(rk3588_regs_dump), .soc_id = 3588, }; diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c index 8998967f0c00..4e2099d86517 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author:Mark Yao <mark.yao@rock-chips.com> */ diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.h b/drivers/gpu/drm/rockchip/rockchip_vop_reg.h index fbf1bcc68625..addf8ca085f6 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.h +++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd + * Copyright (C) Rockchip Electronics Co., Ltd. * Author:Mark Yao <mark.yao@rock-chips.com> */ diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c index 486d8f5282f9..b777690fd660 100644 --- a/drivers/gpu/drm/solomon/ssd130x.c +++ b/drivers/gpu/drm/solomon/ssd130x.c @@ -18,9 +18,9 @@ #include <linux/pwm.h> #include <linux/regulator/consumer.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_damage_helper.h> #include <drm/drm_edid.h> @@ -39,7 +39,6 @@ #define DRIVER_NAME "ssd130x" #define DRIVER_DESC "DRM driver for Solomon SSD13xx OLED displays" -#define DRIVER_DATE "20220131" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 @@ -1784,7 +1783,6 @@ static const struct drm_driver ssd130x_drm_driver = { DRM_FBDEV_SHMEM_DRIVER_OPS, .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .driver_features = DRIVER_ATOMIC | DRIVER_GEM | DRIVER_MODESET, diff --git a/drivers/gpu/drm/sprd/sprd_drm.c b/drivers/gpu/drm/sprd/sprd_drm.c index bc1c747d3ea4..ceacdcb7c566 100644 --- a/drivers/gpu/drm/sprd/sprd_drm.c +++ b/drivers/gpu/drm/sprd/sprd_drm.c @@ -23,7 +23,6 @@ #define DRIVER_NAME "sprd" #define DRIVER_DESC "Spreadtrum SoCs' DRM Driver" -#define DRIVER_DATE "20200201" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 @@ -59,7 +58,6 @@ static struct drm_driver sprd_drm_drv = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, }; diff --git a/drivers/gpu/drm/sti/sti_drv.c b/drivers/gpu/drm/sti/sti_drv.c index 61ceff9aee7e..5e9332df21df 100644 --- a/drivers/gpu/drm/sti/sti_drv.c +++ b/drivers/gpu/drm/sti/sti_drv.c @@ -13,9 +13,9 @@ #include <linux/of_platform.h> #include <linux/platform_device.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_debugfs.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> @@ -29,7 +29,6 @@ #define DRIVER_NAME "sti" #define DRIVER_DESC "STMicroelectronics SoC DRM" -#define DRIVER_DATE "20140601" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 @@ -143,7 +142,6 @@ static const struct drm_driver sti_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, }; diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c index 21b46a6465f0..f8bbae6393ef 100644 --- a/drivers/gpu/drm/sti/sti_hdmi.c +++ b/drivers/gpu/drm/sti/sti_hdmi.c @@ -1225,7 +1225,9 @@ static int hdmi_audio_get_eld(struct device *dev, void *data, uint8_t *buf, size struct drm_connector *connector = hdmi->drm_connector; DRM_DEBUG_DRIVER("\n"); + mutex_lock(&connector->eld_mutex); memcpy(buf, connector->eld, min(sizeof(connector->eld), len)); + mutex_unlock(&connector->eld_mutex); return 0; } diff --git a/drivers/gpu/drm/stm/drv.c b/drivers/gpu/drm/stm/drv.c index bf090a354989..8ebcaf953782 100644 --- a/drivers/gpu/drm/stm/drv.c +++ b/drivers/gpu/drm/stm/drv.c @@ -16,9 +16,9 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_fourcc.h> @@ -62,7 +62,6 @@ static const struct drm_driver drv_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, .name = "stm", .desc = "STMicroelectronics SoC DRM", - .date = "20170330", .major = 1, .minor = 0, .patchlevel = 0, diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c index 5eccf58f2e17..c11dfb2739fa 100644 --- a/drivers/gpu/drm/sun4i/sun4i_drv.c +++ b/drivers/gpu/drm/sun4i/sun4i_drv.c @@ -15,8 +15,8 @@ #include <linux/of_reserved_mem.h> #include <linux/platform_device.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_dma_helper.h> @@ -50,7 +50,6 @@ static const struct drm_driver sun4i_drv_driver = { .fops = &sun4i_drv_fops, .name = "sun4i-drm", .desc = "Allwinner sun4i Display Engine", - .date = "20150629", .major = 1, .minor = 0, diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c index 453f19f16ab7..ab0938ba61f7 100644 --- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c +++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c @@ -187,34 +187,6 @@ sun4i_hdmi_connector_clock_valid(const struct drm_connector *connector, return MODE_NOCLOCK; } -static int sun4i_hdmi_connector_atomic_check(struct drm_connector *connector, - struct drm_atomic_state *state) -{ - struct drm_connector_state *conn_state = - drm_atomic_get_new_connector_state(state, connector); - struct drm_crtc *crtc = conn_state->crtc; - struct drm_crtc_state *crtc_state = crtc->state; - struct drm_display_mode *mode = &crtc_state->adjusted_mode; - enum drm_mode_status status; - - status = sun4i_hdmi_connector_clock_valid(connector, mode, - conn_state->hdmi.tmds_char_rate); - if (status != MODE_OK) - return -EINVAL; - - return 0; -} - -static enum drm_mode_status -sun4i_hdmi_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) -{ - unsigned long long rate = drm_hdmi_compute_mode_clock(mode, 8, - HDMI_COLORSPACE_RGB); - - return sun4i_hdmi_connector_clock_valid(connector, mode, rate); -} - static int sun4i_hdmi_get_modes(struct drm_connector *connector) { struct sun4i_hdmi *hdmi = drm_connector_to_sun4i_hdmi(connector); @@ -268,8 +240,8 @@ static const struct drm_connector_hdmi_funcs sun4i_hdmi_hdmi_connector_funcs = { }; static const struct drm_connector_helper_funcs sun4i_hdmi_connector_helper_funcs = { - .atomic_check = sun4i_hdmi_connector_atomic_check, - .mode_valid = sun4i_hdmi_connector_mode_valid, + .atomic_check = drm_atomic_helper_connector_hdmi_check, + .mode_valid = drm_hdmi_connector_mode_valid, .get_modes = sun4i_hdmi_get_modes, }; diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index bf3421667ecc..4596073fe28f 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -13,9 +13,9 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_debugfs.h> #include <drm/drm_drv.h> #include <drm/drm_fourcc.h> @@ -35,7 +35,6 @@ #define DRIVER_NAME "tegra" #define DRIVER_DESC "NVIDIA Tegra graphics" -#define DRIVER_DATE "20120330" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 #define DRIVER_PATCHLEVEL 0 @@ -901,7 +900,6 @@ static const struct drm_driver tegra_drm_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, diff --git a/drivers/gpu/drm/tests/drm_connector_test.c b/drivers/gpu/drm/tests/drm_connector_test.c index 6bba97d0be88..129e813cfd1b 100644 --- a/drivers/gpu/drm/tests/drm_connector_test.c +++ b/drivers/gpu/drm/tests/drm_connector_test.c @@ -9,6 +9,7 @@ #include <drm/drm_connector.h> #include <drm/drm_drv.h> #include <drm/drm_edid.h> +#include <drm/drm_file.h> #include <drm/drm_kunit_helpers.h> #include <drm/drm_modes.h> @@ -181,6 +182,465 @@ static struct kunit_suite drmm_connector_init_test_suite = { .test_cases = drmm_connector_init_tests, }; +static const struct drm_connector_funcs dummy_dynamic_init_funcs = { + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .reset = drm_atomic_helper_connector_reset, + .destroy = drm_connector_cleanup, +}; + +/* + * Test that the initialization of a bog standard dynamic connector works + * as expected and doesn't report any error. + */ +static void drm_test_drm_connector_dynamic_init(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_connector *connector = &priv->connector; + int ret; + + ret = drm_connector_dynamic_init(&priv->drm, connector, + &dummy_dynamic_init_funcs, + DRM_MODE_CONNECTOR_DisplayPort, + &priv->ddc); + KUNIT_ASSERT_EQ(test, ret, 0); +} + +static void drm_test_connector_dynamic_init_cleanup(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_connector *connector = &priv->connector; + + drm_connector_cleanup(connector); +} + +/* + * Test that the initialization of a dynamic connector without a DDC adapter + * doesn't report any error. + */ +static void drm_test_drm_connector_dynamic_init_null_ddc(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_connector *connector = &priv->connector; + int ret; + + ret = drm_connector_dynamic_init(&priv->drm, connector, + &dummy_dynamic_init_funcs, + DRM_MODE_CONNECTOR_DisplayPort, + NULL); + KUNIT_ASSERT_EQ(test, ret, 0); +} + +/* + * Test that the initialization of a dynamic connector doesn't add the + * connector to the connector list. + */ +static void drm_test_drm_connector_dynamic_init_not_added(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_connector *connector = &priv->connector; + int ret; + + ret = drm_connector_dynamic_init(&priv->drm, connector, + &dummy_dynamic_init_funcs, + DRM_MODE_CONNECTOR_DisplayPort, + &priv->ddc); + KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_PTR_EQ(test, connector->head.next, &connector->head); +} + +static void test_connector_property(struct kunit *test, + struct drm_connector *connector, + const struct drm_property *expected_prop) +{ + struct drm_property *prop; + uint64_t val; + int ret; + + KUNIT_ASSERT_NOT_NULL(test, expected_prop); + prop = drm_mode_obj_find_prop_id(&connector->base, expected_prop->base.id); + KUNIT_ASSERT_PTR_EQ_MSG(test, prop, expected_prop, + "Can't find property %s", expected_prop->name); + + ret = drm_object_property_get_default_value(&connector->base, prop, &val); + KUNIT_EXPECT_EQ(test, ret, 0); + KUNIT_EXPECT_EQ(test, val, 0); + + /* TODO: Check property value in the connector state. */ +} + +/* + * Test that the initialization of a dynamic connector adds all the expected + * properties to it. + */ +static void drm_test_drm_connector_dynamic_init_properties(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_connector *connector = &priv->connector; + struct drm_mode_config *config = &priv->drm.mode_config; + const struct drm_property *props[] = { + config->edid_property, + config->dpms_property, + config->link_status_property, + config->non_desktop_property, + config->tile_property, + config->prop_crtc_id, + }; + int ret; + int i; + + ret = drm_connector_dynamic_init(&priv->drm, connector, + &dummy_dynamic_init_funcs, + DRM_MODE_CONNECTOR_DisplayPort, + &priv->ddc); + KUNIT_ASSERT_EQ(test, ret, 0); + + for (i = 0; i < ARRAY_SIZE(props); i++) + test_connector_property(test, connector, props[i]); +} + +/* + * Test that the initialization of a dynamic connector succeeds for all + * possible connector types. + */ +static void drm_test_drm_connector_dynamic_init_type_valid(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_connector *connector = &priv->connector; + unsigned int connector_type = *(unsigned int *)test->param_value; + int ret; + + ret = drm_connector_dynamic_init(&priv->drm, connector, + &dummy_dynamic_init_funcs, + connector_type, + &priv->ddc); + KUNIT_ASSERT_EQ(test, ret, 0); +} + +/* + * Test that the initialization of a dynamic connector sets the expected name + * for it for all possible connector types. + */ +static void drm_test_drm_connector_dynamic_init_name(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_connector *connector = &priv->connector; + unsigned int connector_type = *(unsigned int *)test->param_value; + char expected_name[128]; + int ret; + + ret = drm_connector_dynamic_init(&priv->drm, connector, + &dummy_dynamic_init_funcs, + connector_type, + &priv->ddc); + KUNIT_ASSERT_EQ(test, ret, 0); + + snprintf(expected_name, sizeof(expected_name), "%s-%d", + drm_get_connector_type_name(connector_type), connector->connector_type_id); + KUNIT_ASSERT_STREQ(test, connector->name, expected_name); +} + +static struct kunit_case drm_connector_dynamic_init_tests[] = { + KUNIT_CASE(drm_test_drm_connector_dynamic_init), + KUNIT_CASE(drm_test_drm_connector_dynamic_init_null_ddc), + KUNIT_CASE(drm_test_drm_connector_dynamic_init_not_added), + KUNIT_CASE(drm_test_drm_connector_dynamic_init_properties), + KUNIT_CASE_PARAM(drm_test_drm_connector_dynamic_init_type_valid, + drm_connector_init_type_valid_gen_params), + KUNIT_CASE_PARAM(drm_test_drm_connector_dynamic_init_name, + drm_connector_init_type_valid_gen_params), + {} +}; + +static struct kunit_suite drm_connector_dynamic_init_test_suite = { + .name = "drm_connector_dynamic_init", + .init = drm_test_connector_init, + .exit = drm_test_connector_dynamic_init_cleanup, + .test_cases = drm_connector_dynamic_init_tests, +}; + +static int drm_test_connector_dynamic_register_early_init(struct kunit *test) +{ + struct drm_connector_init_priv *priv; + int ret; + + ret = drm_test_connector_init(test); + KUNIT_ASSERT_EQ(test, ret, 0); + + priv = test->priv; + + ret = drm_connector_dynamic_init(&priv->drm, &priv->connector, + &dummy_dynamic_init_funcs, + DRM_MODE_CONNECTOR_DisplayPort, + &priv->ddc); + KUNIT_ASSERT_EQ(test, ret, 0); + + return 0; +} + +static void drm_test_connector_dynamic_register_early_cleanup(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_connector *connector = &priv->connector; + + drm_connector_unregister(connector); + drm_connector_put(connector); +} + +/* + * Test that registration of a dynamic connector adds it to the connector list. + */ +static void drm_test_drm_connector_dynamic_register_early_on_list(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_connector *connector = &priv->connector; + int ret; + + KUNIT_ASSERT_TRUE(test, list_empty(&connector->head)); + + ret = drm_connector_dynamic_register(connector); + KUNIT_ASSERT_EQ(test, ret, 0); + + KUNIT_ASSERT_PTR_EQ(test, connector->head.next, &priv->drm.mode_config.connector_list); +} + +/* + * Test that the registration of a dynamic connector before the drm device is + * registered results in deferring the connector's user interface registration. + */ +static void drm_test_drm_connector_dynamic_register_early_defer(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_connector *connector = &priv->connector; + int ret; + + ret = drm_connector_dynamic_register(connector); + KUNIT_ASSERT_EQ(test, ret, 0); + + KUNIT_ASSERT_EQ(test, connector->registration_state, DRM_CONNECTOR_INITIALIZING); +} + +/* + * Test that the registration of a dynamic connector fails, if this is done before + * the connector is initialized. + */ +static void drm_test_drm_connector_dynamic_register_early_no_init(struct kunit *test) +{ + struct drm_connector *connector; + int ret; + + connector = kunit_kzalloc(test, sizeof(*connector), GFP_KERNEL); /* auto freed */ + KUNIT_ASSERT_NOT_NULL(test, connector); + + ret = drm_connector_dynamic_register(connector); + KUNIT_ASSERT_EQ(test, ret, -EINVAL); +} + +/* + * Test that the registration of a dynamic connector before the drm device is + * registered results in deferring adding a mode object for the connector. + */ +static void drm_test_drm_connector_dynamic_register_early_no_mode_object(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_connector *connector = &priv->connector; + struct drm_connector *tmp_connector; + int ret; + + ret = drm_connector_dynamic_register(&priv->connector); + KUNIT_ASSERT_EQ(test, ret, 0); + + tmp_connector = drm_connector_lookup(connector->dev, NULL, connector->base.id); + KUNIT_ASSERT_NULL(test, tmp_connector); +} + +static struct kunit_case drm_connector_dynamic_register_early_tests[] = { + KUNIT_CASE(drm_test_drm_connector_dynamic_register_early_on_list), + KUNIT_CASE(drm_test_drm_connector_dynamic_register_early_defer), + KUNIT_CASE(drm_test_drm_connector_dynamic_register_early_no_init), + KUNIT_CASE(drm_test_drm_connector_dynamic_register_early_no_mode_object), + { } +}; + +static struct kunit_suite drm_connector_dynamic_register_early_test_suite = { + .name = "drm_connector_dynamic_register_early", + .init = drm_test_connector_dynamic_register_early_init, + .exit = drm_test_connector_dynamic_register_early_cleanup, + .test_cases = drm_connector_dynamic_register_early_tests, +}; + +static int drm_test_connector_dynamic_register_init(struct kunit *test) +{ + struct drm_connector_init_priv *priv; + int ret; + + ret = drm_test_connector_dynamic_register_early_init(test); + KUNIT_ASSERT_EQ(test, ret, 0); + + priv = test->priv; + + ret = drm_dev_register(priv->connector.dev, 0); + KUNIT_ASSERT_EQ(test, ret, 0); + + return 0; +} + +static void drm_test_connector_dynamic_register_cleanup(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_device *dev = priv->connector.dev; + + drm_connector_unregister(&priv->connector); + drm_connector_put(&priv->connector); + + drm_dev_unregister(dev); + + drm_test_connector_dynamic_register_early_cleanup(test); +} + +static void drm_test_drm_connector_dynamic_register_on_list(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + int ret; + + KUNIT_ASSERT_TRUE(test, list_empty(&priv->connector.head)); + + ret = drm_connector_dynamic_register(&priv->connector); + KUNIT_ASSERT_EQ(test, ret, 0); + + KUNIT_ASSERT_PTR_EQ(test, priv->connector.head.next, &priv->drm.mode_config.connector_list); +} + +/* + * Test that the registration of a dynamic connector doesn't get deferred if + * this is done after the drm device is registered. + */ +static void drm_test_drm_connector_dynamic_register_no_defer(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + int ret; + + KUNIT_ASSERT_EQ(test, priv->connector.registration_state, DRM_CONNECTOR_INITIALIZING); + + ret = drm_connector_dynamic_register(&priv->connector); + KUNIT_ASSERT_EQ(test, ret, 0); + + KUNIT_ASSERT_EQ(test, priv->connector.registration_state, DRM_CONNECTOR_REGISTERED); +} + +/* + * Test that the registration of a dynamic connector fails if this is done after the + * drm device is registered, but before the connector is initialized. + */ +static void drm_test_drm_connector_dynamic_register_no_init(struct kunit *test) +{ + struct drm_connector *connector; + int ret; + + connector = kunit_kzalloc(test, sizeof(*connector), GFP_KERNEL); /* auto freed */ + KUNIT_ASSERT_NOT_NULL(test, connector); + + ret = drm_connector_dynamic_register(connector); + KUNIT_ASSERT_EQ(test, ret, -EINVAL); +} + +/* + * Test that the registration of a dynamic connector after the drm device is + * registered adds the mode object for the connector. + */ +static void drm_test_drm_connector_dynamic_register_mode_object(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_connector *connector = &priv->connector; + struct drm_connector *tmp_connector; + int ret; + + tmp_connector = drm_connector_lookup(connector->dev, NULL, connector->base.id); + KUNIT_ASSERT_NULL(test, tmp_connector); + + ret = drm_connector_dynamic_register(&priv->connector); + KUNIT_ASSERT_EQ(test, ret, 0); + + tmp_connector = drm_connector_lookup(connector->dev, NULL, connector->base.id); + KUNIT_ASSERT_PTR_EQ(test, tmp_connector, connector); +} + +/* + * Test that the registration of a dynamic connector after the drm device is + * registered adds the connector to sysfs. + */ +static void drm_test_drm_connector_dynamic_register_sysfs(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_connector *connector = &priv->connector; + int ret; + + KUNIT_ASSERT_NULL(test, connector->kdev); + + ret = drm_connector_dynamic_register(connector); + KUNIT_ASSERT_EQ(test, ret, 0); + + KUNIT_ASSERT_NOT_NULL(test, connector->kdev); +} + +/* + * Test that the registration of a dynamic connector after the drm device is + * registered sets the connector's sysfs name as expected. + */ +static void drm_test_drm_connector_dynamic_register_sysfs_name(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + struct drm_connector *connector = &priv->connector; + char expected_name[128]; + int ret; + + ret = drm_connector_dynamic_register(connector); + KUNIT_ASSERT_EQ(test, ret, 0); + + snprintf(expected_name, sizeof(expected_name), "card%d-%s", + connector->dev->primary->index, connector->name); + + KUNIT_ASSERT_STREQ(test, dev_name(connector->kdev), expected_name); +} + +/* + * Test that the registration of a dynamic connector after the drm device is + * registered adds the connector to debugfs. + */ +static void drm_test_drm_connector_dynamic_register_debugfs(struct kunit *test) +{ + struct drm_connector_init_priv *priv = test->priv; + int ret; + + KUNIT_ASSERT_NULL(test, priv->connector.debugfs_entry); + + ret = drm_connector_dynamic_register(&priv->connector); + KUNIT_ASSERT_EQ(test, ret, 0); + + if (IS_ENABLED(CONFIG_DEBUG_FS)) + KUNIT_ASSERT_NOT_NULL(test, priv->connector.debugfs_entry); + else + KUNIT_ASSERT_NULL(test, priv->connector.debugfs_entry); +} + +static struct kunit_case drm_connector_dynamic_register_tests[] = { + KUNIT_CASE(drm_test_drm_connector_dynamic_register_on_list), + KUNIT_CASE(drm_test_drm_connector_dynamic_register_no_defer), + KUNIT_CASE(drm_test_drm_connector_dynamic_register_no_init), + KUNIT_CASE(drm_test_drm_connector_dynamic_register_mode_object), + KUNIT_CASE(drm_test_drm_connector_dynamic_register_sysfs), + KUNIT_CASE(drm_test_drm_connector_dynamic_register_sysfs_name), + KUNIT_CASE(drm_test_drm_connector_dynamic_register_debugfs), + { } +}; + +static struct kunit_suite drm_connector_dynamic_register_test_suite = { + .name = "drm_connector_dynamic_register", + .init = drm_test_connector_dynamic_register_init, + .exit = drm_test_connector_dynamic_register_cleanup, + .test_cases = drm_connector_dynamic_register_tests, +}; + /* * Test that the registration of a bog standard connector works as * expected and doesn't report any error. @@ -1283,6 +1743,9 @@ static struct kunit_suite drm_hdmi_compute_mode_clock_test_suite = { kunit_test_suites( &drmm_connector_hdmi_init_test_suite, &drmm_connector_init_test_suite, + &drm_connector_dynamic_init_test_suite, + &drm_connector_dynamic_register_early_test_suite, + &drm_connector_dynamic_register_test_suite, &drm_connector_attach_broadcast_rgb_property_test_suite, &drm_get_tv_mode_from_name_test_suite, &drm_hdmi_compute_mode_clock_test_suite, diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c index 294773342e71..c3b693bb966f 100644 --- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c @@ -46,7 +46,7 @@ static struct drm_display_mode *find_preferred_mode(struct drm_connector *connec struct drm_display_mode *mode, *preferred; mutex_lock(&drm->mode_config.mutex); - preferred = list_first_entry(&connector->modes, struct drm_display_mode, head); + preferred = list_first_entry_or_null(&connector->modes, struct drm_display_mode, head); list_for_each_entry(mode, &connector->modes, head) if (mode->type & DRM_MODE_TYPE_PREFERRED) preferred = mode; @@ -105,9 +105,8 @@ static int set_connector_edid(struct kunit *test, struct drm_connector *connecto mutex_lock(&drm->mode_config.mutex); ret = connector->funcs->fill_modes(connector, 4096, 4096); mutex_unlock(&drm->mode_config.mutex); - KUNIT_ASSERT_GT(test, ret, 0); - return 0; + return ret; } static const struct drm_connector_hdmi_funcs dummy_connector_hdmi_funcs = { @@ -125,6 +124,18 @@ static const struct drm_connector_hdmi_funcs reject_connector_hdmi_funcs = { .tmds_char_rate_valid = reject_connector_tmds_char_rate_valid, }; +static enum drm_mode_status +reject_100MHz_connector_tmds_char_rate_valid(const struct drm_connector *connector, + const struct drm_display_mode *mode, + unsigned long long tmds_rate) +{ + return (tmds_rate > 100ULL * 1000 * 1000) ? MODE_BAD : MODE_OK; +} + +static const struct drm_connector_hdmi_funcs reject_100_MHz_connector_hdmi_funcs = { + .tmds_char_rate_valid = reject_100MHz_connector_tmds_char_rate_valid, +}; + static int dummy_connector_get_modes(struct drm_connector *connector) { struct drm_atomic_helper_connector_hdmi_priv *priv = @@ -147,6 +158,7 @@ static int dummy_connector_get_modes(struct drm_connector *connector) static const struct drm_connector_helper_funcs dummy_connector_helper_funcs = { .atomic_check = drm_atomic_helper_connector_hdmi_check, .get_modes = dummy_connector_get_modes, + .mode_valid = drm_hdmi_connector_mode_valid, }; static void dummy_hdmi_connector_reset(struct drm_connector *connector) @@ -164,9 +176,10 @@ static const struct drm_connector_funcs dummy_connector_funcs = { static struct drm_atomic_helper_connector_hdmi_priv * -drm_atomic_helper_connector_hdmi_init(struct kunit *test, - unsigned int formats, - unsigned int max_bpc) +drm_kunit_helper_connector_hdmi_init_funcs(struct kunit *test, + unsigned int formats, + unsigned int max_bpc, + const struct drm_connector_hdmi_funcs *hdmi_funcs) { struct drm_atomic_helper_connector_hdmi_priv *priv; struct drm_connector *conn; @@ -208,7 +221,7 @@ drm_atomic_helper_connector_hdmi_init(struct kunit *test, ret = drmm_connector_hdmi_init(drm, conn, "Vendor", "Product", &dummy_connector_funcs, - &dummy_connector_hdmi_funcs, + hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, NULL, formats, @@ -220,10 +233,27 @@ drm_atomic_helper_connector_hdmi_init(struct kunit *test, drm_mode_config_reset(drm); - ret = set_connector_edid(test, conn, + return priv; +} + +static +struct drm_atomic_helper_connector_hdmi_priv * +drm_kunit_helper_connector_hdmi_init(struct kunit *test, + unsigned int formats, + unsigned int max_bpc) +{ + struct drm_atomic_helper_connector_hdmi_priv *priv; + int ret; + + priv = drm_kunit_helper_connector_hdmi_init_funcs(test, + formats, max_bpc, + &dummy_connector_hdmi_funcs); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, priv); + + ret = set_connector_edid(test, &priv->connector, test_edid_hdmi_1080p_rgb_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); return priv; } @@ -247,9 +277,9 @@ static void drm_test_check_broadcast_rgb_crtc_mode_changed(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 8); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); ctx = drm_kunit_helper_acquire_ctx_alloc(test); @@ -310,9 +340,9 @@ static void drm_test_check_broadcast_rgb_crtc_mode_not_changed(struct kunit *tes struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 8); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); ctx = drm_kunit_helper_acquire_ctx_alloc(test); @@ -373,9 +403,9 @@ static void drm_test_check_broadcast_rgb_auto_cea_mode(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 8); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; @@ -429,9 +459,9 @@ static void drm_test_check_broadcast_rgb_auto_cea_mode_vic_1(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 8); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); drm = &priv->drm; @@ -485,9 +515,9 @@ static void drm_test_check_broadcast_rgb_full_cea_mode(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 8); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; @@ -543,9 +573,9 @@ static void drm_test_check_broadcast_rgb_full_cea_mode_vic_1(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 8); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); drm = &priv->drm; @@ -601,9 +631,9 @@ static void drm_test_check_broadcast_rgb_limited_cea_mode(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 8); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; @@ -659,9 +689,9 @@ static void drm_test_check_broadcast_rgb_limited_cea_mode_vic_1(struct kunit *te struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 8); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); drm = &priv->drm; @@ -719,16 +749,16 @@ static void drm_test_check_output_bpc_crtc_mode_changed(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 10); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 10); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); ctx = drm_kunit_helper_acquire_ctx_alloc(test); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); @@ -793,16 +823,16 @@ static void drm_test_check_output_bpc_crtc_mode_not_changed(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 10); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 10); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); ctx = drm_kunit_helper_acquire_ctx_alloc(test); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); @@ -862,18 +892,18 @@ static void drm_test_check_output_bpc_dvi(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB) | - BIT(HDMI_COLORSPACE_YUV422) | - BIT(HDMI_COLORSPACE_YUV444), - 12); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB) | + BIT(HDMI_COLORSPACE_YUV422) | + BIT(HDMI_COLORSPACE_YUV444), + 12); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_dvi_1080p, ARRAY_SIZE(test_edid_dvi_1080p)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_FALSE(test, info->is_hdmi); @@ -911,16 +941,16 @@ static void drm_test_check_tmds_char_rate_rgb_8bpc(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 8); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); ctx = drm_kunit_helper_acquire_ctx_alloc(test); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); @@ -958,16 +988,16 @@ static void drm_test_check_tmds_char_rate_rgb_10bpc(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 10); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 10); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); ctx = drm_kunit_helper_acquire_ctx_alloc(test); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); @@ -1005,16 +1035,16 @@ static void drm_test_check_tmds_char_rate_rgb_12bpc(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 12); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 12); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); ctx = drm_kunit_helper_acquire_ctx_alloc(test); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx); @@ -1056,9 +1086,9 @@ static void drm_test_check_hdmi_funcs_reject_rate(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 8); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); ctx = drm_kunit_helper_acquire_ctx_alloc(test); @@ -1112,16 +1142,16 @@ static void drm_test_check_max_tmds_rate_bpc_fallback(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 12); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 12); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_TRUE(test, info->is_hdmi); @@ -1179,18 +1209,18 @@ static void drm_test_check_max_tmds_rate_format_fallback(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB) | - BIT(HDMI_COLORSPACE_YUV422) | - BIT(HDMI_COLORSPACE_YUV444), - 12); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB) | + BIT(HDMI_COLORSPACE_YUV422) | + BIT(HDMI_COLORSPACE_YUV444), + 12); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_TRUE(test, info->is_hdmi); @@ -1242,11 +1272,11 @@ static void drm_test_check_output_bpc_format_vic_1(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB) | - BIT(HDMI_COLORSPACE_YUV422) | - BIT(HDMI_COLORSPACE_YUV444), - 12); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB) | + BIT(HDMI_COLORSPACE_YUV422) | + BIT(HDMI_COLORSPACE_YUV444), + 12); KUNIT_ASSERT_NOT_NULL(test, priv); drm = &priv->drm; @@ -1254,7 +1284,7 @@ static void drm_test_check_output_bpc_format_vic_1(struct kunit *test) ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_TRUE(test, info->is_hdmi); @@ -1305,16 +1335,16 @@ static void drm_test_check_output_bpc_format_driver_rgb_only(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 12); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 12); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_TRUE(test, info->is_hdmi); @@ -1370,18 +1400,18 @@ static void drm_test_check_output_bpc_format_display_rgb_only(struct kunit *test struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB) | - BIT(HDMI_COLORSPACE_YUV422) | - BIT(HDMI_COLORSPACE_YUV444), - 12); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB) | + BIT(HDMI_COLORSPACE_YUV422) | + BIT(HDMI_COLORSPACE_YUV444), + 12); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_max_200mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_max_200mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_TRUE(test, info->is_hdmi); @@ -1438,16 +1468,16 @@ static void drm_test_check_output_bpc_format_driver_8bpc_only(struct kunit *test struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 8); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_yuv_dc_max_340mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_TRUE(test, info->is_hdmi); @@ -1496,18 +1526,18 @@ static void drm_test_check_output_bpc_format_display_8bpc_only(struct kunit *tes struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB) | - BIT(HDMI_COLORSPACE_YUV422) | - BIT(HDMI_COLORSPACE_YUV444), - 12); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB) | + BIT(HDMI_COLORSPACE_YUV422) | + BIT(HDMI_COLORSPACE_YUV444), + 12); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; ret = set_connector_edid(test, conn, test_edid_hdmi_1080p_rgb_max_340mhz, ARRAY_SIZE(test_edid_hdmi_1080p_rgb_max_340mhz)); - KUNIT_ASSERT_EQ(test, ret, 0); + KUNIT_ASSERT_GT(test, ret, 0); info = &conn->display_info; KUNIT_ASSERT_TRUE(test, info->is_hdmi); @@ -1593,9 +1623,9 @@ static void drm_test_check_broadcast_rgb_value(struct kunit *test) struct drm_connector_state *conn_state; struct drm_connector *conn; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 8); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; @@ -1615,9 +1645,9 @@ static void drm_test_check_bpc_8_value(struct kunit *test) struct drm_connector_state *conn_state; struct drm_connector *conn; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 8); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; @@ -1639,9 +1669,9 @@ static void drm_test_check_bpc_10_value(struct kunit *test) struct drm_connector_state *conn_state; struct drm_connector *conn; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 10); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 10); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; @@ -1663,9 +1693,9 @@ static void drm_test_check_bpc_12_value(struct kunit *test) struct drm_connector_state *conn_state; struct drm_connector *conn; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB), - 12); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 12); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; @@ -1685,11 +1715,11 @@ static void drm_test_check_format_value(struct kunit *test) struct drm_connector_state *conn_state; struct drm_connector *conn; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB) | - BIT(HDMI_COLORSPACE_YUV422) | - BIT(HDMI_COLORSPACE_YUV444), - 8); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB) | + BIT(HDMI_COLORSPACE_YUV422) | + BIT(HDMI_COLORSPACE_YUV444), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; @@ -1707,11 +1737,11 @@ static void drm_test_check_tmds_char_value(struct kunit *test) struct drm_connector_state *conn_state; struct drm_connector *conn; - priv = drm_atomic_helper_connector_hdmi_init(test, - BIT(HDMI_COLORSPACE_RGB) | - BIT(HDMI_COLORSPACE_YUV422) | - BIT(HDMI_COLORSPACE_YUV444), - 12); + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB) | + BIT(HDMI_COLORSPACE_YUV422) | + BIT(HDMI_COLORSPACE_YUV444), + 12); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; @@ -1734,9 +1764,148 @@ static struct kunit_suite drm_atomic_helper_connector_hdmi_reset_test_suite = { .test_cases = drm_atomic_helper_connector_hdmi_reset_tests, }; +/* + * Test that the default behaviour for drm_hdmi_connector_mode_valid() is not + * to reject any modes. Pass a correct EDID and verify that preferred mode + * matches the expectations (1080p). + */ +static void drm_test_check_mode_valid(struct kunit *test) +{ + struct drm_atomic_helper_connector_hdmi_priv *priv; + struct drm_connector *conn; + struct drm_display_mode *preferred; + + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); + KUNIT_ASSERT_NOT_NULL(test, priv); + + conn = &priv->connector; + preferred = find_preferred_mode(conn); + KUNIT_ASSERT_NOT_NULL(test, preferred); + + KUNIT_EXPECT_EQ(test, preferred->hdisplay, 1920); + KUNIT_EXPECT_EQ(test, preferred->vdisplay, 1080); + KUNIT_EXPECT_EQ(test, preferred->clock, 148500); +} + +/* + * Test that the drm_hdmi_connector_mode_valid() will reject modes depending on + * the .tmds_char_rate_valid() behaviour. + * Pass a correct EDID and verify that high-rate modes are filtered. + */ +static void drm_test_check_mode_valid_reject_rate(struct kunit *test) +{ + struct drm_atomic_helper_connector_hdmi_priv *priv; + struct drm_connector *conn; + struct drm_display_mode *preferred; + int ret; + + priv = drm_kunit_helper_connector_hdmi_init_funcs(test, + BIT(HDMI_COLORSPACE_RGB), + 8, + &reject_100_MHz_connector_hdmi_funcs); + KUNIT_ASSERT_NOT_NULL(test, priv); + + conn = &priv->connector; + + ret = set_connector_edid(test, conn, + test_edid_hdmi_1080p_rgb_max_200mhz, + ARRAY_SIZE(test_edid_hdmi_1080p_rgb_max_200mhz)); + KUNIT_ASSERT_GT(test, ret, 0); + + /* + * Unlike the drm_test_check_mode_valid() here 1080p is rejected, but + * 480p is allowed. + */ + preferred = find_preferred_mode(conn); + KUNIT_ASSERT_NOT_NULL(test, preferred); + KUNIT_EXPECT_EQ(test, preferred->hdisplay, 640); + KUNIT_EXPECT_EQ(test, preferred->vdisplay, 480); + KUNIT_EXPECT_EQ(test, preferred->clock, 25200); +} + +/* + * Test that the drm_hdmi_connector_mode_valid() will not mark any modes as + * valid if .tmds_char_rate_valid() rejects all of them. Pass a correct EDID + * and verify that there is no preferred mode and no modes were set for the + * connector. + */ +static void drm_test_check_mode_valid_reject(struct kunit *test) +{ + struct drm_atomic_helper_connector_hdmi_priv *priv; + struct drm_connector *conn; + struct drm_display_mode *preferred; + int ret; + + priv = drm_kunit_helper_connector_hdmi_init_funcs(test, + BIT(HDMI_COLORSPACE_RGB), + 8, + &reject_connector_hdmi_funcs); + KUNIT_ASSERT_NOT_NULL(test, priv); + + conn = &priv->connector; + + /* should reject all modes */ + ret = set_connector_edid(test, conn, + test_edid_hdmi_1080p_rgb_max_200mhz, + ARRAY_SIZE(test_edid_hdmi_1080p_rgb_max_200mhz)); + KUNIT_ASSERT_EQ(test, ret, 0); + + preferred = find_preferred_mode(conn); + KUNIT_ASSERT_NULL(test, preferred); +} + +/* + * Test that the drm_hdmi_connector_mode_valid() will reject modes that don't + * pass the info.max_tmds_clock filter. Pass crafted EDID and verify that + * high-rate modes are filtered. + */ +static void drm_test_check_mode_valid_reject_max_clock(struct kunit *test) +{ + struct drm_atomic_helper_connector_hdmi_priv *priv; + struct drm_connector *conn; + struct drm_display_mode *preferred; + int ret; + + priv = drm_kunit_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); + KUNIT_ASSERT_NOT_NULL(test, priv); + + conn = &priv->connector; + + ret = set_connector_edid(test, conn, + test_edid_hdmi_1080p_rgb_max_100mhz, + ARRAY_SIZE(test_edid_hdmi_1080p_rgb_max_100mhz)); + KUNIT_ASSERT_GT(test, ret, 0); + + KUNIT_ASSERT_EQ(test, conn->display_info.max_tmds_clock, 100 * 1000); + + preferred = find_preferred_mode(conn); + KUNIT_ASSERT_NOT_NULL(test, preferred); + KUNIT_EXPECT_EQ(test, preferred->hdisplay, 640); + KUNIT_EXPECT_EQ(test, preferred->vdisplay, 480); + KUNIT_EXPECT_EQ(test, preferred->clock, 25200); +} + +static struct kunit_case drm_atomic_helper_connector_hdmi_mode_valid_tests[] = { + KUNIT_CASE(drm_test_check_mode_valid), + KUNIT_CASE(drm_test_check_mode_valid_reject), + KUNIT_CASE(drm_test_check_mode_valid_reject_rate), + KUNIT_CASE(drm_test_check_mode_valid_reject_max_clock), + { } +}; + +static struct kunit_suite drm_atomic_helper_connector_hdmi_mode_valid_test_suite = { + .name = "drm_atomic_helper_connector_hdmi_mode_valid", + .test_cases = drm_atomic_helper_connector_hdmi_mode_valid_tests, +}; + kunit_test_suites( &drm_atomic_helper_connector_hdmi_check_test_suite, &drm_atomic_helper_connector_hdmi_reset_test_suite, + &drm_atomic_helper_connector_hdmi_mode_valid_test_suite, ); MODULE_AUTHOR("Maxime Ripard <mripard@kernel.org>"); diff --git a/drivers/gpu/drm/tests/drm_kunit_edid.h b/drivers/gpu/drm/tests/drm_kunit_edid.h index 107559900e97..6358397a5d7a 100644 --- a/drivers/gpu/drm/tests/drm_kunit_edid.h +++ b/drivers/gpu/drm/tests/drm_kunit_edid.h @@ -74,6 +74,108 @@ static const unsigned char test_edid_dvi_1080p[] = { * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 92 * * 02 03 1b 81 e3 05 00 20 41 10 e2 00 4a 6d 03 0c + * 00 12 34 00 14 20 00 00 00 00 00 00 00 00 00 00 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 e4 + * + * ---------------- + * + * Block 0, Base EDID: + * EDID Structure Version & Revision: 1.3 + * Vendor & Product Identification: + * Manufacturer: LNX + * Model: 42 + * Made in: 2023 + * Basic Display Parameters & Features: + * Digital display + * DFP 1.x compatible TMDS + * Maximum image size: 160 cm x 90 cm + * Gamma: 2.20 + * Monochrome or grayscale display + * First detailed timing is the preferred timing + * Color Characteristics: + * Red : 0.0000, 0.0000 + * Green: 0.0000, 0.0000 + * Blue : 0.0000, 0.0000 + * White: 0.0000, 0.0000 + * Established Timings I & II: + * DMT 0x04: 640x480 59.940476 Hz 4:3 31.469 kHz 25.175000 MHz + * Standard Timings: none + * Detailed Timing Descriptors: + * DTD 1: 1920x1080 60.000000 Hz 16:9 67.500 kHz 148.500000 MHz (1600 mm x 900 mm) + * Hfront 88 Hsync 44 Hback 148 Hpol P + * Vfront 4 Vsync 5 Vback 36 Vpol P + * Display Product Name: 'Test EDID' + * Display Range Limits: + * Monitor ranges (GTF): 50-70 Hz V, 30-70 kHz H, max dotclock 150 MHz + * Dummy Descriptor: + * Extension blocks: 1 + * Checksum: 0x92 + * + * ---------------- + * + * Block 1, CTA-861 Extension Block: + * Revision: 3 + * Underscans IT Video Formats by default + * Native detailed modes: 1 + * Colorimetry Data Block: + * sRGB + * Video Data Block: + * VIC 16: 1920x1080 60.000000 Hz 16:9 67.500 kHz 148.500000 MHz + * Video Capability Data Block: + * YCbCr quantization: No Data + * RGB quantization: Selectable (via AVI Q) + * PT scan behavior: No Data + * IT scan behavior: Always Underscanned + * CE scan behavior: Always Underscanned + * Vendor-Specific Data Block (HDMI), OUI 00-0C-03: + * Source physical address: 1.2.3.4 + * Maximum TMDS clock: 100 MHz + * Extended HDMI video details: + * Checksum: 0xe4 Unused space in Extension Block: 100 bytes + */ +static const unsigned char test_edid_hdmi_1080p_rgb_max_100mhz[] = { + 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x31, 0xd8, 0x2a, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x01, 0x03, 0x81, 0xa0, 0x5a, 0x78, + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, + 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x3a, 0x80, 0x18, 0x71, 0x38, + 0x2d, 0x40, 0x58, 0x2c, 0x45, 0x00, 0x40, 0x84, 0x63, 0x00, 0x00, 0x1e, + 0x00, 0x00, 0x00, 0xfc, 0x00, 0x54, 0x65, 0x73, 0x74, 0x20, 0x45, 0x44, + 0x49, 0x44, 0x0a, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x32, + 0x46, 0x00, 0x00, 0xc4, 0x00, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x41, 0x02, 0x03, 0x1b, 0x81, + 0xe3, 0x05, 0x00, 0x20, 0x41, 0x10, 0xe2, 0x00, 0x4a, 0x6d, 0x03, 0x0c, + 0x00, 0x12, 0x34, 0x00, 0x14, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xe4 +}; + +/* + * edid-decode (hex): + * + * 00 ff ff ff ff ff ff 00 31 d8 2a 00 00 00 00 00 + * 00 21 01 03 81 a0 5a 78 02 00 00 00 00 00 00 00 + * 00 00 00 20 00 00 01 01 01 01 01 01 01 01 01 01 + * 01 01 01 01 01 01 02 3a 80 18 71 38 2d 40 58 2c + * 45 00 40 84 63 00 00 1e 00 00 00 fc 00 54 65 73 + * 74 20 45 44 49 44 0a 20 20 20 00 00 00 fd 00 32 + * 46 1e 46 0f 00 0a 20 20 20 20 20 20 00 00 00 10 + * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 92 + * + * 02 03 1b 81 e3 05 00 20 41 10 e2 00 4a 6d 03 0c * 00 12 34 00 28 20 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 diff --git a/drivers/gpu/drm/tidss/tidss_dispc.c b/drivers/gpu/drm/tidss/tidss_dispc.c index 1ad711f8d2a8..cacb5f3d8085 100644 --- a/drivers/gpu/drm/tidss/tidss_dispc.c +++ b/drivers/gpu/drm/tidss/tidss_dispc.c @@ -700,7 +700,7 @@ void dispc_k2g_set_irqenable(struct dispc_device *dispc, dispc_irq_t mask) { dispc_irq_t old_mask = dispc_k2g_read_irqenable(dispc); - /* clear the irqstatus for newly enabled irqs */ + /* clear the irqstatus for irqs that will be enabled */ dispc_k2g_clear_irqstatus(dispc, (mask ^ old_mask) & mask); dispc_k2g_vp_set_irqenable(dispc, 0, mask); @@ -708,6 +708,9 @@ void dispc_k2g_set_irqenable(struct dispc_device *dispc, dispc_irq_t mask) dispc_write(dispc, DISPC_IRQENABLE_SET, (1 << 0) | (1 << 7)); + /* clear the irqstatus for irqs that were disabled */ + dispc_k2g_clear_irqstatus(dispc, (mask ^ old_mask) & old_mask); + /* flush posted write */ dispc_k2g_read_irqenable(dispc); } @@ -780,24 +783,18 @@ static void dispc_k3_clear_irqstatus(struct dispc_device *dispc, dispc_irq_t clearmask) { unsigned int i; - u32 top_clear = 0; for (i = 0; i < dispc->feat->num_vps; ++i) { - if (clearmask & DSS_IRQ_VP_MASK(i)) { + if (clearmask & DSS_IRQ_VP_MASK(i)) dispc_k3_vp_write_irqstatus(dispc, i, clearmask); - top_clear |= BIT(i); - } } for (i = 0; i < dispc->feat->num_planes; ++i) { - if (clearmask & DSS_IRQ_PLANE_MASK(i)) { + if (clearmask & DSS_IRQ_PLANE_MASK(i)) dispc_k3_vid_write_irqstatus(dispc, i, clearmask); - top_clear |= BIT(4 + i); - } } - if (dispc->feat->subrev == DISPC_K2G) - return; - dispc_write(dispc, DISPC_IRQSTATUS, top_clear); + /* always clear the top level irqstatus */ + dispc_write(dispc, DISPC_IRQSTATUS, dispc_read(dispc, DISPC_IRQSTATUS)); /* Flush posted writes */ dispc_read(dispc, DISPC_IRQSTATUS); @@ -843,7 +840,7 @@ static void dispc_k3_set_irqenable(struct dispc_device *dispc, old_mask = dispc_k3_read_irqenable(dispc); - /* clear the irqstatus for newly enabled irqs */ + /* clear the irqstatus for irqs that will be enabled */ dispc_k3_clear_irqstatus(dispc, (old_mask ^ mask) & mask); for (i = 0; i < dispc->feat->num_vps; ++i) { @@ -868,6 +865,9 @@ static void dispc_k3_set_irqenable(struct dispc_device *dispc, if (main_disable) dispc_write(dispc, DISPC_IRQENABLE_CLR, main_disable); + /* clear the irqstatus for irqs that were disabled */ + dispc_k3_clear_irqstatus(dispc, (old_mask ^ mask) & old_mask); + /* Flush posted writes */ dispc_read(dispc, DISPC_IRQENABLE_SET); } @@ -2767,8 +2767,12 @@ static void dispc_init_errata(struct dispc_device *dispc) */ static void dispc_softreset_k2g(struct dispc_device *dispc) { + unsigned long flags; + + spin_lock_irqsave(&dispc->tidss->irq_lock, flags); dispc_set_irqenable(dispc, 0); dispc_read_and_clear_irqstatus(dispc); + spin_unlock_irqrestore(&dispc->tidss->irq_lock, flags); for (unsigned int vp_idx = 0; vp_idx < dispc->feat->num_vps; ++vp_idx) VP_REG_FLD_MOD(dispc, vp_idx, DISPC_VP_CONTROL, 0, 0, 0); diff --git a/drivers/gpu/drm/tidss/tidss_drv.c b/drivers/gpu/drm/tidss/tidss_drv.c index 7c8fd6407d82..d4652e8cc28c 100644 --- a/drivers/gpu/drm/tidss/tidss_drv.c +++ b/drivers/gpu/drm/tidss/tidss_drv.c @@ -9,9 +9,9 @@ #include <linux/module.h> #include <linux/pm_runtime.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_crtc.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> @@ -113,7 +113,6 @@ static const struct drm_driver tidss_driver = { DRM_FBDEV_DMA_DRIVER_OPS, .name = "tidss", .desc = "TI Keystone DSS", - .date = "20180215", .major = 1, .minor = 0, }; @@ -140,7 +139,7 @@ static int tidss_probe(struct platform_device *pdev) platform_set_drvdata(pdev, tidss); - spin_lock_init(&tidss->wait_lock); + spin_lock_init(&tidss->irq_lock); ret = dispc_init(tidss); if (ret) { diff --git a/drivers/gpu/drm/tidss/tidss_drv.h b/drivers/gpu/drm/tidss/tidss_drv.h index d7f27b0b0315..7f4f4282bc04 100644 --- a/drivers/gpu/drm/tidss/tidss_drv.h +++ b/drivers/gpu/drm/tidss/tidss_drv.h @@ -29,8 +29,9 @@ struct tidss_device { unsigned int irq; - spinlock_t wait_lock; /* protects the irq masks */ - dispc_irq_t irq_mask; /* enabled irqs in addition to wait_list */ + /* protects the irq masks field and irqenable/irqstatus registers */ + spinlock_t irq_lock; + dispc_irq_t irq_mask; /* enabled irqs */ }; #define to_tidss(__dev) container_of(__dev, struct tidss_device, ddev) diff --git a/drivers/gpu/drm/tidss/tidss_irq.c b/drivers/gpu/drm/tidss/tidss_irq.c index 604334ef526a..5abc788781f4 100644 --- a/drivers/gpu/drm/tidss/tidss_irq.c +++ b/drivers/gpu/drm/tidss/tidss_irq.c @@ -15,10 +15,9 @@ #include "tidss_irq.h" #include "tidss_plane.h" -/* call with wait_lock and dispc runtime held */ static void tidss_irq_update(struct tidss_device *tidss) { - assert_spin_locked(&tidss->wait_lock); + assert_spin_locked(&tidss->irq_lock); dispc_set_irqenable(tidss->dispc, tidss->irq_mask); } @@ -31,11 +30,11 @@ void tidss_irq_enable_vblank(struct drm_crtc *crtc) u32 hw_videoport = tcrtc->hw_videoport; unsigned long flags; - spin_lock_irqsave(&tidss->wait_lock, flags); + spin_lock_irqsave(&tidss->irq_lock, flags); tidss->irq_mask |= DSS_IRQ_VP_VSYNC_EVEN(hw_videoport) | DSS_IRQ_VP_VSYNC_ODD(hw_videoport); tidss_irq_update(tidss); - spin_unlock_irqrestore(&tidss->wait_lock, flags); + spin_unlock_irqrestore(&tidss->irq_lock, flags); } void tidss_irq_disable_vblank(struct drm_crtc *crtc) @@ -46,11 +45,11 @@ void tidss_irq_disable_vblank(struct drm_crtc *crtc) u32 hw_videoport = tcrtc->hw_videoport; unsigned long flags; - spin_lock_irqsave(&tidss->wait_lock, flags); + spin_lock_irqsave(&tidss->irq_lock, flags); tidss->irq_mask &= ~(DSS_IRQ_VP_VSYNC_EVEN(hw_videoport) | DSS_IRQ_VP_VSYNC_ODD(hw_videoport)); tidss_irq_update(tidss); - spin_unlock_irqrestore(&tidss->wait_lock, flags); + spin_unlock_irqrestore(&tidss->irq_lock, flags); } static irqreturn_t tidss_irq_handler(int irq, void *arg) @@ -60,7 +59,9 @@ static irqreturn_t tidss_irq_handler(int irq, void *arg) unsigned int id; dispc_irq_t irqstatus; + spin_lock(&tidss->irq_lock); irqstatus = dispc_read_and_clear_irqstatus(tidss->dispc); + spin_unlock(&tidss->irq_lock); for (id = 0; id < tidss->num_crtcs; id++) { struct drm_crtc *crtc = tidss->crtcs[id]; @@ -78,8 +79,13 @@ static irqreturn_t tidss_irq_handler(int irq, void *arg) tidss_crtc_error_irq(crtc, irqstatus); } - if (irqstatus & DSS_IRQ_DEVICE_OCP_ERR) - dev_err_ratelimited(tidss->dev, "OCP error\n"); + for (unsigned int i = 0; i < tidss->num_planes; ++i) { + struct drm_plane *plane = tidss->planes[i]; + struct tidss_plane *tplane = to_tidss_plane(plane); + + if (irqstatus & DSS_IRQ_PLANE_FIFO_UNDERFLOW(tplane->hw_plane_id)) + tidss_plane_error_irq(plane, irqstatus); + } return IRQ_HANDLED; } @@ -88,9 +94,9 @@ void tidss_irq_resume(struct tidss_device *tidss) { unsigned long flags; - spin_lock_irqsave(&tidss->wait_lock, flags); + spin_lock_irqsave(&tidss->irq_lock, flags); tidss_irq_update(tidss); - spin_unlock_irqrestore(&tidss->wait_lock, flags); + spin_unlock_irqrestore(&tidss->irq_lock, flags); } int tidss_irq_install(struct drm_device *ddev, unsigned int irq) @@ -105,7 +111,7 @@ int tidss_irq_install(struct drm_device *ddev, unsigned int irq) if (ret) return ret; - tidss->irq_mask = DSS_IRQ_DEVICE_OCP_ERR; + tidss->irq_mask = 0; for (unsigned int i = 0; i < tidss->num_crtcs; ++i) { struct tidss_crtc *tcrtc = to_tidss_crtc(tidss->crtcs[i]); @@ -115,6 +121,12 @@ int tidss_irq_install(struct drm_device *ddev, unsigned int irq) tidss->irq_mask |= DSS_IRQ_VP_FRAME_DONE(tcrtc->hw_videoport); } + for (unsigned int i = 0; i < tidss->num_planes; ++i) { + struct tidss_plane *tplane = to_tidss_plane(tidss->planes[i]); + + tidss->irq_mask |= DSS_IRQ_PLANE_FIFO_UNDERFLOW(tplane->hw_plane_id); + } + return 0; } diff --git a/drivers/gpu/drm/tidss/tidss_irq.h b/drivers/gpu/drm/tidss/tidss_irq.h index b512614d5863..dd61f645f662 100644 --- a/drivers/gpu/drm/tidss/tidss_irq.h +++ b/drivers/gpu/drm/tidss/tidss_irq.h @@ -19,15 +19,13 @@ * bit use |D |fou|FEOL|FEOL|FEOL|FEOL| UUUU | | * bit number|0 |1-3|4-7 |8-11| 12-19 | 20-23 | 24-31 | * - * device bits: D = OCP error + * device bits: D = Unused * WB bits: f = frame done wb, o = wb buffer overflow, * u = wb buffer uncomplete * vp bits: F = frame done, E = vsync even, O = vsync odd, L = sync lost * plane bits: U = fifo underflow */ -#define DSS_IRQ_DEVICE_OCP_ERR BIT(0) - #define DSS_IRQ_DEVICE_FRAMEDONEWB BIT(1) #define DSS_IRQ_DEVICE_WBBUFFEROVERFLOW BIT(2) #define DSS_IRQ_DEVICE_WBUNCOMPLETEERROR BIT(3) diff --git a/drivers/gpu/drm/tidss/tidss_plane.c b/drivers/gpu/drm/tidss/tidss_plane.c index a5d86822c9e3..116de124bddb 100644 --- a/drivers/gpu/drm/tidss/tidss_plane.c +++ b/drivers/gpu/drm/tidss/tidss_plane.c @@ -18,6 +18,14 @@ #include "tidss_drv.h" #include "tidss_plane.h" +void tidss_plane_error_irq(struct drm_plane *plane, u64 irqstatus) +{ + struct tidss_plane *tplane = to_tidss_plane(plane); + + dev_err_ratelimited(plane->dev->dev, "Plane%u underflow (irq %llx)\n", + tplane->hw_plane_id, irqstatus); +} + /* drm_plane_helper_funcs */ static int tidss_plane_atomic_check(struct drm_plane *plane, diff --git a/drivers/gpu/drm/tidss/tidss_plane.h b/drivers/gpu/drm/tidss/tidss_plane.h index e933e158b617..aecaf2728406 100644 --- a/drivers/gpu/drm/tidss/tidss_plane.h +++ b/drivers/gpu/drm/tidss/tidss_plane.h @@ -22,4 +22,6 @@ struct tidss_plane *tidss_plane_create(struct tidss_device *tidss, u32 crtc_mask, const u32 *formats, u32 num_formats); +void tidss_plane_error_irq(struct drm_plane *plane, u64 irqstatus); + #endif diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c index 6f0df8d6b90c..7caec4d38ddf 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c @@ -13,8 +13,8 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_debugfs.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> @@ -481,7 +481,6 @@ static const struct drm_driver tilcdc_driver = { .fops = &fops, .name = "tilcdc", .desc = "TI LCD Controller DRM", - .date = "20121205", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/tiny/Makefile b/drivers/gpu/drm/tiny/Makefile index 4aaf56f8707d..60816d2eb4ff 100644 --- a/drivers/gpu/drm/tiny/Makefile +++ b/drivers/gpu/drm/tiny/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_DRM_ARCPGU) += arcpgu.o obj-$(CONFIG_DRM_BOCHS) += bochs.o -obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus.o +obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus-qemu.o obj-$(CONFIG_DRM_GM12U320) += gm12u320.o obj-$(CONFIG_DRM_OFDRM) += ofdrm.o obj-$(CONFIG_DRM_PANEL_MIPI_DBI) += panel-mipi-dbi.o diff --git a/drivers/gpu/drm/tiny/arcpgu.c b/drivers/gpu/drm/tiny/arcpgu.c index 0cc68042a6d6..2748d1f21d86 100644 --- a/drivers/gpu/drm/tiny/arcpgu.c +++ b/drivers/gpu/drm/tiny/arcpgu.c @@ -6,8 +6,9 @@ */ #include <linux/clk.h> + +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_debugfs.h> #include <drm/drm_device.h> #include <drm/drm_drv.h> @@ -289,7 +290,7 @@ static int arcpgu_load(struct arcpgu_drm_private *arcpgu) * There is only one output port inside each device. It is linked with * encoder endpoint. */ - endpoint_node = of_graph_get_next_endpoint(pdev->dev.of_node, NULL); + endpoint_node = of_graph_get_endpoint_by_regs(pdev->dev.of_node, 0, -1); if (endpoint_node) { encoder_node = of_graph_get_remote_port_parent(endpoint_node); of_node_put(endpoint_node); @@ -366,7 +367,6 @@ static const struct drm_driver arcpgu_drm_driver = { .driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC, .name = "arcpgu", .desc = "ARC PGU Controller", - .date = "20160219", .major = 1, .minor = 0, .patchlevel = 0, diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index 6f91ff1dbf7e..89a699370a59 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c @@ -5,9 +5,9 @@ #include <linux/module.h> #include <linux/pci.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> #include <drm/drm_edid.h> @@ -680,7 +680,6 @@ static const struct drm_driver bochs_driver = { .fops = &bochs_fops, .name = "bochs-drm", .desc = "bochs dispi vga interface (qemu stdvga)", - .date = "20130925", .major = 1, .minor = 0, DRM_GEM_SHMEM_DRIVER_OPS, diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus-qemu.c index 4d2adcaeaa60..52ec1e4ea9e5 100644 --- a/drivers/gpu/drm/tiny/cirrus.c +++ b/drivers/gpu/drm/tiny/cirrus-qemu.c @@ -24,10 +24,10 @@ #include <video/cirrus.h> #include <video/vga.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_atomic_state_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_connector.h> #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> @@ -46,9 +46,8 @@ #include <drm/drm_module.h> #include <drm/drm_probe_helper.h> -#define DRIVER_NAME "cirrus" +#define DRIVER_NAME "cirrus-qemu" #define DRIVER_DESC "qemu cirrus vga" -#define DRIVER_DATE "2019" #define DRIVER_MAJOR 2 #define DRIVER_MINOR 0 @@ -589,14 +588,14 @@ static int cirrus_pipe_init(struct cirrus_device *cirrus) encoder = &cirrus->encoder; ret = drm_encoder_init(dev, encoder, &cirrus_encoder_funcs, - DRM_MODE_ENCODER_DAC, NULL); + DRM_MODE_ENCODER_VIRTUAL, NULL); if (ret) return ret; encoder->possible_crtcs = drm_crtc_mask(crtc); connector = &cirrus->connector; ret = drm_connector_init(dev, connector, &cirrus_connector_funcs, - DRM_MODE_CONNECTOR_VGA); + DRM_MODE_CONNECTOR_VIRTUAL); if (ret) return ret; drm_connector_helper_add(connector, &cirrus_connector_helper_funcs); @@ -659,7 +658,6 @@ static const struct drm_driver cirrus_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, diff --git a/drivers/gpu/drm/tiny/gm12u320.c b/drivers/gpu/drm/tiny/gm12u320.c index 0c17ae532fb4..41e9bfb2e2ff 100644 --- a/drivers/gpu/drm/tiny/gm12u320.c +++ b/drivers/gpu/drm/tiny/gm12u320.c @@ -7,9 +7,9 @@ #include <linux/pm.h> #include <linux/usb.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_atomic_state_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_connector.h> #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> @@ -34,7 +34,6 @@ MODULE_PARM_DESC(eco_mode, "Turn on Eco mode (less bright, more silent)"); #define DRIVER_NAME "gm12u320" #define DRIVER_DESC "Grain Media GM12U320 USB projector display" -#define DRIVER_DATE "2019" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 @@ -626,7 +625,6 @@ static const struct drm_driver gm12u320_drm_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, diff --git a/drivers/gpu/drm/tiny/hx8357d.c b/drivers/gpu/drm/tiny/hx8357d.c index 6b0d1846cfcf..df263818f45f 100644 --- a/drivers/gpu/drm/tiny/hx8357d.c +++ b/drivers/gpu/drm/tiny/hx8357d.c @@ -16,8 +16,8 @@ #include <linux/property.h> #include <linux/spi/spi.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_atomic_helper.h> @@ -199,7 +199,6 @@ static const struct drm_driver hx8357d_driver = { .debugfs_init = mipi_dbi_debugfs_init, .name = "hx8357d", .desc = "HX8357D", - .date = "20181023", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/tiny/ili9163.c b/drivers/gpu/drm/tiny/ili9163.c index 5eb39ca1a855..62cadf5e033d 100644 --- a/drivers/gpu/drm/tiny/ili9163.c +++ b/drivers/gpu/drm/tiny/ili9163.c @@ -7,8 +7,8 @@ #include <linux/property.h> #include <linux/spi/spi.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_atomic_helper.h> @@ -118,7 +118,6 @@ static struct drm_driver ili9163_driver = { .debugfs_init = mipi_dbi_debugfs_init, .name = "ili9163", .desc = "Ilitek ILI9163", - .date = "20210208", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/tiny/ili9225.c b/drivers/gpu/drm/tiny/ili9225.c index 875e2d09729a..6de44ff69b51 100644 --- a/drivers/gpu/drm/tiny/ili9225.c +++ b/drivers/gpu/drm/tiny/ili9225.c @@ -16,8 +16,8 @@ #include <linux/spi/spi.h> #include <video/mipi_display.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> #include <drm/drm_fb_dma_helper.h> @@ -364,7 +364,6 @@ static const struct drm_driver ili9225_driver = { DRM_FBDEV_DMA_DRIVER_OPS, .name = "ili9225", .desc = "Ilitek ILI9225", - .date = "20171106", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/tiny/ili9341.c b/drivers/gpu/drm/tiny/ili9341.c index c1dfdfbbd30c..e55029433509 100644 --- a/drivers/gpu/drm/tiny/ili9341.c +++ b/drivers/gpu/drm/tiny/ili9341.c @@ -15,8 +15,8 @@ #include <linux/property.h> #include <linux/spi/spi.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_atomic_helper.h> @@ -155,7 +155,6 @@ static const struct drm_driver ili9341_driver = { .debugfs_init = mipi_dbi_debugfs_init, .name = "ili9341", .desc = "Ilitek ILI9341", - .date = "20180514", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/tiny/ili9486.c b/drivers/gpu/drm/tiny/ili9486.c index 7e46a720d5e2..093661c771a0 100644 --- a/drivers/gpu/drm/tiny/ili9486.c +++ b/drivers/gpu/drm/tiny/ili9486.c @@ -14,8 +14,8 @@ #include <video/mipi_display.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_atomic_helper.h> @@ -177,7 +177,6 @@ static const struct drm_driver ili9486_driver = { .debugfs_init = mipi_dbi_debugfs_init, .name = "ili9486", .desc = "Ilitek ILI9486", - .date = "20200118", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/tiny/mi0283qt.c b/drivers/gpu/drm/tiny/mi0283qt.c index f1461c55dba6..b6b4664908ae 100644 --- a/drivers/gpu/drm/tiny/mi0283qt.c +++ b/drivers/gpu/drm/tiny/mi0283qt.c @@ -13,8 +13,8 @@ #include <linux/regulator/consumer.h> #include <linux/spi/spi.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_atomic_helper.h> @@ -159,7 +159,6 @@ static const struct drm_driver mi0283qt_driver = { .debugfs_init = mipi_dbi_debugfs_init, .name = "mi0283qt", .desc = "Multi-Inno MI0283QT", - .date = "20160614", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c index 9898eab5e9e2..13491c0e704a 100644 --- a/drivers/gpu/drm/tiny/ofdrm.c +++ b/drivers/gpu/drm/tiny/ofdrm.c @@ -5,9 +5,9 @@ #include <linux/pci.h> #include <linux/platform_device.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_state_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_connector.h> #include <drm/drm_damage_helper.h> #include <drm/drm_device.h> @@ -25,7 +25,6 @@ #define DRIVER_NAME "ofdrm" #define DRIVER_DESC "DRM driver for OF platform devices" -#define DRIVER_DATE "20220501" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 @@ -1348,7 +1347,6 @@ static struct drm_driver ofdrm_driver = { DRM_FBDEV_SHMEM_DRIVER_OPS, .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .driver_features = DRIVER_ATOMIC | DRIVER_GEM | DRIVER_MODESET, diff --git a/drivers/gpu/drm/tiny/panel-mipi-dbi.c b/drivers/gpu/drm/tiny/panel-mipi-dbi.c index e66729b31bd6..0460ecaef4bd 100644 --- a/drivers/gpu/drm/tiny/panel-mipi-dbi.c +++ b/drivers/gpu/drm/tiny/panel-mipi-dbi.c @@ -10,12 +10,13 @@ #include <linux/firmware.h> #include <linux/gpio/consumer.h> #include <linux/module.h> +#include <linux/of.h> #include <linux/property.h> #include <linux/regulator/consumer.h> #include <linux/spi/spi.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_atomic_helper.h> @@ -269,7 +270,6 @@ static const struct drm_driver panel_mipi_dbi_driver = { .debugfs_init = mipi_dbi_debugfs_init, .name = "panel-mipi-dbi", .desc = "MIPI DBI compatible display panel", - .date = "20220103", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c index 77944eb17b3c..52ba6c699bc8 100644 --- a/drivers/gpu/drm/tiny/repaper.c +++ b/drivers/gpu/drm/tiny/repaper.c @@ -21,8 +21,8 @@ #include <linux/spi/spi.h> #include <linux/thermal.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_connector.h> #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> @@ -917,7 +917,6 @@ static const struct drm_driver repaper_driver = { DRM_FBDEV_DMA_DRIVER_OPS, .name = "repaper", .desc = "Pervasive Displays RePaper e-ink panels", - .date = "20170405", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/tiny/sharp-memory.c b/drivers/gpu/drm/tiny/sharp-memory.c index 2d2315bd6aef..03d2850310c4 100644 --- a/drivers/gpu/drm/tiny/sharp-memory.c +++ b/drivers/gpu/drm/tiny/sharp-memory.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_connector.h> #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> @@ -107,7 +107,6 @@ static const struct drm_driver sharp_memory_drm_driver = { DRM_FBDEV_DMA_DRIVER_OPS, .name = "sharp_memory_display", .desc = "Sharp Display Memory LCD", - .date = "20231129", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c index 4d4f05dee244..5d9ab8adf800 100644 --- a/drivers/gpu/drm/tiny/simpledrm.c +++ b/drivers/gpu/drm/tiny/simpledrm.c @@ -10,9 +10,9 @@ #include <linux/pm_domain.h> #include <linux/regulator/consumer.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_state_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_connector.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_damage_helper.h> @@ -31,7 +31,6 @@ #define DRIVER_NAME "simpledrm" #define DRIVER_DESC "DRM driver for simple-framebuffer platform devices" -#define DRIVER_DATE "20200625" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 @@ -1015,7 +1014,6 @@ static struct drm_driver simpledrm_driver = { DRM_FBDEV_SHMEM_DRIVER_OPS, .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .driver_features = DRIVER_ATOMIC | DRIVER_GEM | DRIVER_MODESET, diff --git a/drivers/gpu/drm/tiny/st7586.c b/drivers/gpu/drm/tiny/st7586.c index 97013685c62f..a29672d84ede 100644 --- a/drivers/gpu/drm/tiny/st7586.c +++ b/drivers/gpu/drm/tiny/st7586.c @@ -12,8 +12,8 @@ #include <linux/spi/spi.h> #include <video/mipi_display.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> #include <drm/drm_fb_dma_helper.h> @@ -295,7 +295,6 @@ static const struct drm_driver st7586_driver = { .debugfs_init = mipi_dbi_debugfs_init, .name = "st7586", .desc = "Sitronix ST7586", - .date = "20170801", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/tiny/st7735r.c b/drivers/gpu/drm/tiny/st7735r.c index 0747ebd999cc..1d60f6e5b3bc 100644 --- a/drivers/gpu/drm/tiny/st7735r.c +++ b/drivers/gpu/drm/tiny/st7735r.c @@ -16,8 +16,8 @@ #include <linux/spi/spi.h> #include <video/mipi_display.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_gem_atomic_helper.h> @@ -160,7 +160,6 @@ static const struct drm_driver st7735r_driver = { .debugfs_init = mipi_dbi_debugfs_init, .name = "st7735r", .desc = "Sitronix ST7735R", - .date = "20171128", .major = 1, .minor = 0, }; diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 2c699ed1963a..a194db83421d 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -58,13 +58,13 @@ static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) return VM_FAULT_RETRY; - ttm_bo_get(bo); + drm_gem_object_get(&bo->base); mmap_read_unlock(vmf->vma->vm_mm); (void)dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_KERNEL, true, MAX_SCHEDULE_TIMEOUT); dma_resv_unlock(bo->base.resv); - ttm_bo_put(bo); + drm_gem_object_put(&bo->base); return VM_FAULT_RETRY; } @@ -130,12 +130,12 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, */ if (fault_flag_allow_retry_first(vmf->flags)) { if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { - ttm_bo_get(bo); + drm_gem_object_get(&bo->base); mmap_read_unlock(vmf->vma->vm_mm); if (!dma_resv_lock_interruptible(bo->base.resv, NULL)) dma_resv_unlock(bo->base.resv); - ttm_bo_put(bo); + drm_gem_object_put(&bo->base); } return VM_FAULT_RETRY; @@ -353,7 +353,7 @@ void ttm_bo_vm_open(struct vm_area_struct *vma) WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping); - ttm_bo_get(bo); + drm_gem_object_get(&bo->base); } EXPORT_SYMBOL(ttm_bo_vm_open); @@ -361,7 +361,7 @@ void ttm_bo_vm_close(struct vm_area_struct *vma) { struct ttm_buffer_object *bo = vma->vm_private_data; - ttm_bo_put(bo); + drm_gem_object_put(&bo->base); vma->vm_private_data = NULL; } EXPORT_SYMBOL(ttm_bo_vm_close); @@ -405,13 +405,25 @@ static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo, return len; } -int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, - void *buf, int len, int write) +/** + * ttm_bo_access - Helper to access a buffer object + * + * @bo: ttm buffer object + * @offset: access offset into buffer object + * @buf: pointer to caller memory to read into or write from + * @len: length of access + * @write: write access + * + * Utility function to access a buffer object. Useful when buffer object cannot + * be easily mapped (non-contiguous, non-visible, etc...). Should not directly + * be exported to user space via a peak / poke interface. + * + * Returns: + * @len if successful, negative error code on failure. + */ +int ttm_bo_access(struct ttm_buffer_object *bo, unsigned long offset, + void *buf, int len, int write) { - struct ttm_buffer_object *bo = vma->vm_private_data; - unsigned long offset = (addr) - vma->vm_start + - ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node)) - << PAGE_SHIFT); int ret; if (len < 1 || (offset + len) > bo->base.size) @@ -429,8 +441,8 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, break; default: if (bo->bdev->funcs->access_memory) - ret = bo->bdev->funcs->access_memory( - bo, offset, buf, len, write); + ret = bo->bdev->funcs->access_memory + (bo, offset, buf, len, write); else ret = -EIO; } @@ -439,6 +451,18 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, return ret; } +EXPORT_SYMBOL(ttm_bo_access); + +int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write) +{ + struct ttm_buffer_object *bo = vma->vm_private_data; + unsigned long offset = (addr) - vma->vm_start + + ((vma->vm_pgoff - drm_vma_node_start(&bo->base.vma_node)) + << PAGE_SHIFT); + + return ttm_bo_access(bo, offset, buf, len, write); +} EXPORT_SYMBOL(ttm_bo_vm_access); static const struct vm_operations_struct ttm_bo_vm_ops = { @@ -462,7 +486,7 @@ int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object *bo) if (is_cow_mapping(vma->vm_flags)) return -EINVAL; - ttm_bo_get(bo); + drm_gem_object_get(&bo->base); /* * Drivers may want to override the vm_ops field. Otherwise we diff --git a/drivers/gpu/drm/tve200/tve200_drv.c b/drivers/gpu/drm/tve200/tve200_drv.c index c341aee37dd9..a048e37f1c2c 100644 --- a/drivers/gpu/drm/tve200/tve200_drv.c +++ b/drivers/gpu/drm/tve200/tve200_drv.c @@ -37,9 +37,9 @@ #include <linux/shmem_fs.h> #include <linux/slab.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_bridge.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_fourcc.h> @@ -146,7 +146,6 @@ static const struct drm_driver tve200_drm_driver = { .fops = &drm_fops, .name = "tve200", .desc = DRIVER_DESC, - .date = "20170703", .major = 1, .minor = 0, .patchlevel = 0, diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c index 8d8ae40f945c..05b3a152cc33 100644 --- a/drivers/gpu/drm/udl/udl_drv.c +++ b/drivers/gpu/drm/udl/udl_drv.c @@ -5,8 +5,8 @@ #include <linux/module.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_drv.h> -#include <drm/drm_client_setup.h> #include <drm/drm_fbdev_shmem.h> #include <drm/drm_file.h> #include <drm/drm_gem_shmem_helper.h> @@ -78,7 +78,6 @@ static const struct drm_driver driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h index 1eb716d9dad5..be00dc1d87a1 100644 --- a/drivers/gpu/drm/udl/udl_drv.h +++ b/drivers/gpu/drm/udl/udl_drv.h @@ -26,7 +26,6 @@ struct drm_mode_create_dumb; #define DRIVER_NAME "udl" #define DRIVER_DESC "DisplayLink" -#define DRIVER_DATE "20120220" #define DRIVER_MAJOR 0 #define DRIVER_MINOR 0 diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c index 73ab7dd31b17..bb7815599435 100644 --- a/drivers/gpu/drm/v3d/v3d_bo.c +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -13,10 +13,6 @@ * Display engines requiring physically contiguous allocations should * look into Mesa's "renderonly" support (as used by the Mesa pl111 * driver) for an example of how to integrate with V3D. - * - * Long term, we should support evicting pages from the MMU when under - * memory pressure (thus the v3d_bo_get_pages() refcounting), but - * that's not a high priority since our systems tend to not have swap. */ #include <linux/dma-buf.h> diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c index 19e3ee7ac897..76816f2551c1 100644 --- a/drivers/gpu/drm/v3d/v3d_debugfs.c +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c @@ -237,8 +237,8 @@ static int v3d_measure_clock(struct seq_file *m, void *unused) if (v3d->ver >= 40) { int cycle_count_reg = V3D_PCTR_CYCLE_COUNT(v3d->ver); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_SRC_0_3, - V3D_SET_FIELD(cycle_count_reg, - V3D_PCTR_S0)); + V3D_SET_FIELD_VER(cycle_count_reg, + V3D_PCTR_S0, v3d->ver)); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_CLR, 1); V3D_CORE_WRITE(core, V3D_V4_PCTR_0_EN, 1); } else { diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index bee51c942a56..930737a9347b 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -31,7 +31,6 @@ #define DRIVER_NAME "v3d" #define DRIVER_DESC "Broadcom V3D graphics" -#define DRIVER_DATE "20180419" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 #define DRIVER_PATCHLEVEL 0 @@ -224,6 +223,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_COUNTER, v3d_perfmon_get_counter_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_PERFMON_SET_GLOBAL, v3d_perfmon_set_global_ioctl, DRM_RENDER_ALLOW), }; static const struct drm_driver v3d_drm_driver = { @@ -248,7 +248,6 @@ static const struct drm_driver v3d_drm_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index de73eefff9ac..dc1cfe2e14be 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -183,6 +183,12 @@ struct v3d_dev { u32 num_allocated; u32 pages_allocated; } bo_stats; + + /* To support a performance analysis tool in user space, we require + * a single, globally configured performance monitor (perfmon) for + * all jobs. + */ + struct v3d_perfmon *global_perfmon; }; static inline struct v3d_dev * @@ -594,6 +600,8 @@ int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); /* v3d_sysfs.c */ int v3d_sysfs_init(struct device *dev); diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c index 0f564fd7160c..a25d25a8ae61 100644 --- a/drivers/gpu/drm/v3d/v3d_mmu.c +++ b/drivers/gpu/drm/v3d/v3d_mmu.c @@ -4,7 +4,7 @@ /** * DOC: Broadcom V3D MMU * - * The V3D 3.x hardware (compared to VC4) now includes an MMU. It has + * The V3D 3.x hardware (compared to VC4) now includes an MMU. It has * a single level of page tables for the V3D's 4GB address space to * map to AXI bus addresses, thus it could need up to 4MB of * physically contiguous memory to store the PTEs. @@ -15,14 +15,14 @@ * * To protect clients from each other, we should use the GMP to * quickly mask out (at 128kb granularity) what pages are available to - * each client. This is not yet implemented. + * each client. This is not yet implemented. */ #include "v3d_drv.h" #include "v3d_regs.h" -/* Note: All PTEs for the 1MB superpage must be filled with the - * superpage bit set. +/* Note: All PTEs for the 64KB bigpage or 1MB superpage must be filled + * with the bigpage/superpage bit set. */ #define V3D_PTE_SUPERPAGE BIT(31) #define V3D_PTE_BIGPAGE BIT(30) diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index 924814cab46a..3ebda2fa46fc 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -240,17 +240,18 @@ void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon) for (i = 0; i < ncounters; i++) { u32 source = i / 4; - u32 channel = V3D_SET_FIELD(perfmon->counters[i], V3D_PCTR_S0); + u32 channel = V3D_SET_FIELD_VER(perfmon->counters[i], V3D_PCTR_S0, + v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S1); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S1, v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S2); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S2, v3d->ver); i++; - channel |= V3D_SET_FIELD(i < ncounters ? perfmon->counters[i] : 0, - V3D_PCTR_S3); + channel |= V3D_SET_FIELD_VER(i < ncounters ? perfmon->counters[i] : 0, + V3D_PCTR_S3, v3d->ver); V3D_CORE_WRITE(0, V3D_V4_PCTR_0_SRC_X(source), channel); } @@ -312,6 +313,9 @@ static int v3d_perfmon_idr_del(int id, void *elem, void *data) if (perfmon == v3d->active_perfmon) v3d_perfmon_stop(v3d, perfmon, false); + /* If the global perfmon is being destroyed, set it to NULL */ + cmpxchg(&v3d->global_perfmon, perfmon, NULL); + v3d_perfmon_put(perfmon); return 0; @@ -383,6 +387,7 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data, { struct v3d_file_priv *v3d_priv = file_priv->driver_priv; struct drm_v3d_perfmon_destroy *req = data; + struct v3d_dev *v3d = v3d_priv->v3d; struct v3d_perfmon *perfmon; mutex_lock(&v3d_priv->perfmon.lock); @@ -392,6 +397,13 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data, if (!perfmon) return -EINVAL; + /* If the active perfmon is being destroyed, stop it first */ + if (perfmon == v3d->active_perfmon) + v3d_perfmon_stop(v3d, perfmon, false); + + /* If the global perfmon is being destroyed, set it to NULL */ + cmpxchg(&v3d->global_perfmon, perfmon, NULL); + v3d_perfmon_put(perfmon); return 0; @@ -451,3 +463,34 @@ int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, return 0; } + +int v3d_perfmon_set_global_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_perfmon_set_global *req = data; + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_perfmon *perfmon; + + if (req->flags & ~DRM_V3D_PERFMON_CLEAR_GLOBAL) + return -EINVAL; + + perfmon = v3d_perfmon_find(v3d_priv, req->id); + if (!perfmon) + return -EINVAL; + + /* If the request is to clear the global performance monitor */ + if (req->flags & DRM_V3D_PERFMON_CLEAR_GLOBAL) { + if (!v3d->global_perfmon) + return -EINVAL; + + xchg(&v3d->global_perfmon, NULL); + + return 0; + } + + if (cmpxchg(&v3d->global_perfmon, NULL, perfmon)) + return -EBUSY; + + return 0; +} diff --git a/drivers/gpu/drm/v3d/v3d_performance_counters.h b/drivers/gpu/drm/v3d/v3d_performance_counters.h index d919a2fc9449..2bc4cce0744a 100644 --- a/drivers/gpu/drm/v3d/v3d_performance_counters.h +++ b/drivers/gpu/drm/v3d/v3d_performance_counters.h @@ -2,11 +2,12 @@ /* * Copyright (C) 2024 Raspberry Pi */ + #ifndef V3D_PERFORMANCE_COUNTERS_H #define V3D_PERFORMANCE_COUNTERS_H -/* Holds a description of a given performance counter. The index of performance - * counter is given by the array on v3d_performance_counter.h +/* Holds a description of a given performance counter. The index of + * performance counter is given by the array on `v3d_performance_counter.c`. */ struct v3d_perf_counter_desc { /* Category of the counter */ @@ -20,15 +21,12 @@ struct v3d_perf_counter_desc { }; struct v3d_perfmon_info { - /* - * Different revisions of V3D have different total number of + /* Different revisions of V3D have different total number of * performance counters. */ unsigned int max_counters; - /* - * Array of counters valid for the platform. - */ + /* Array of counters valid for the platform. */ const struct v3d_perf_counter_desc *counters; }; diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h index 1b1a62ad9585..6da3c69082bd 100644 --- a/drivers/gpu/drm/v3d/v3d_regs.h +++ b/drivers/gpu/drm/v3d/v3d_regs.h @@ -15,6 +15,14 @@ fieldval & field##_MASK; \ }) +#define V3D_SET_FIELD_VER(value, field, ver) \ + ({ \ + typeof(ver) _ver = (ver); \ + u32 fieldval = (value) << field##_SHIFT(_ver); \ + WARN_ON((fieldval & ~field##_MASK(_ver)) != 0); \ + fieldval & field##_MASK(_ver); \ + }) + #define V3D_GET_FIELD(word, field) (((word) & field##_MASK) >> \ field##_SHIFT) @@ -354,18 +362,15 @@ #define V3D_V4_PCTR_0_SRC_28_31 0x0067c #define V3D_V4_PCTR_0_SRC_X(x) (V3D_V4_PCTR_0_SRC_0_3 + \ 4 * (x)) -# define V3D_PCTR_S0_MASK V3D_MASK(6, 0) -# define V3D_V7_PCTR_S0_MASK V3D_MASK(7, 0) -# define V3D_PCTR_S0_SHIFT 0 -# define V3D_PCTR_S1_MASK V3D_MASK(14, 8) -# define V3D_V7_PCTR_S1_MASK V3D_MASK(15, 8) -# define V3D_PCTR_S1_SHIFT 8 -# define V3D_PCTR_S2_MASK V3D_MASK(22, 16) -# define V3D_V7_PCTR_S2_MASK V3D_MASK(23, 16) -# define V3D_PCTR_S2_SHIFT 16 -# define V3D_PCTR_S3_MASK V3D_MASK(30, 24) -# define V3D_V7_PCTR_S3_MASK V3D_MASK(31, 24) -# define V3D_PCTR_S3_SHIFT 24 +# define V3D_PCTR_S0_MASK(ver) (((ver) >= 71) ? V3D_MASK(7, 0) : V3D_MASK(6, 0)) +# define V3D_PCTR_S0_SHIFT(ver) 0 +# define V3D_PCTR_S1_MASK(ver) (((ver) >= 71) ? V3D_MASK(15, 8) : V3D_MASK(14, 8)) +# define V3D_PCTR_S1_SHIFT(ver) 8 +# define V3D_PCTR_S2_MASK(ver) (((ver) >= 71) ? V3D_MASK(23, 16) : V3D_MASK(22, 16)) +# define V3D_PCTR_S2_SHIFT(ver) 16 +# define V3D_PCTR_S3_MASK(ver) (((ver) >= 71) ? V3D_MASK(31, 24) : V3D_MASK(30, 24)) +# define V3D_PCTR_S3_SHIFT(ver) 24 + #define V3D_PCTR_CYCLE_COUNT(ver) ((ver >= 71) ? 0 : 32) /* Output values of the counters. */ diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 99ac4995b5a1..da08ddb01d21 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -5,16 +5,16 @@ * DOC: Broadcom V3D scheduling * * The shared DRM GPU scheduler is used to coordinate submitting jobs - * to the hardware. Each DRM fd (roughly a client process) gets its - * own scheduler entity, which will process jobs in order. The GPU - * scheduler will round-robin between clients to submit the next job. + * to the hardware. Each DRM fd (roughly a client process) gets its + * own scheduler entity, which will process jobs in order. The GPU + * scheduler will schedule the clients with a FIFO scheduling algorithm. * * For simplicity, and in order to keep latency low for interactive * jobs when bulk background jobs are queued up, we submit a new job * to the HW only when it has completed the last one, instead of - * filling up the CT[01]Q FIFOs with jobs. Similarly, we use - * drm_sched_job_add_dependency() to manage the dependency between bin and - * render, instead of having the clients submit jobs using the HW's + * filling up the CT[01]Q FIFOs with jobs. Similarly, we use + * `drm_sched_job_add_dependency()` to manage the dependency between bin + * and render, instead of having the clients submit jobs using the HW's * semaphores to interlock between them. */ @@ -120,11 +120,19 @@ v3d_cpu_job_free(struct drm_sched_job *sched_job) static void v3d_switch_perfmon(struct v3d_dev *v3d, struct v3d_job *job) { - if (job->perfmon != v3d->active_perfmon) + struct v3d_perfmon *perfmon = v3d->global_perfmon; + + if (!perfmon) + perfmon = job->perfmon; + + if (perfmon == v3d->active_perfmon) + return; + + if (perfmon != v3d->active_perfmon) v3d_perfmon_stop(v3d, v3d->active_perfmon, true); - if (job->perfmon && v3d->active_perfmon != job->perfmon) - v3d_perfmon_start(v3d, job->perfmon); + if (perfmon && v3d->active_perfmon != perfmon) + v3d_perfmon_start(v3d, perfmon); } static void diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index d607aa9c4ec2..4ff5de46fb22 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -11,10 +11,11 @@ #include "v3d_trace.h" /* Takes the reservation lock on all the BOs being referenced, so that - * at queue submit time we can update the reservations. + * we can attach fences and update the reservations after pushing the job + * to the queue. * * We don't lock the RCL the tile alloc/state BOs, or overflow memory - * (all of which are on exec->unref_list). They're entirely private + * (all of which are on render->unref_list). They're entirely private * to v3d, so we don't attach dma-buf fences to them. */ static int @@ -55,11 +56,11 @@ fail: * @bo_count: Number of GEM handles passed in * * The command validator needs to reference BOs by their index within - * the submitted job's BO list. This does the validation of the job's + * the submitted job's BO list. This does the validation of the job's * BO list and reference counting for the lifetime of the job. * * Note that this function doesn't need to unreference the BOs on - * failure, because that will happen at v3d_exec_cleanup() time. + * failure, because that will happen at `v3d_job_free()`. */ static int v3d_lookup_bos(struct drm_device *dev, @@ -981,6 +982,11 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data, goto fail; if (args->perfmon_id) { + if (v3d->global_perfmon) { + ret = -EAGAIN; + goto fail_perfmon; + } + render->base.perfmon = v3d_perfmon_find(v3d_priv, args->perfmon_id); @@ -1196,6 +1202,11 @@ v3d_submit_csd_ioctl(struct drm_device *dev, void *data, goto fail; if (args->perfmon_id) { + if (v3d->global_perfmon) { + ret = -EAGAIN; + goto fail_perfmon; + } + job->base.perfmon = v3d_perfmon_find(v3d_priv, args->perfmon_id); if (!job->base.perfmon) { diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.c b/drivers/gpu/drm/vboxvideo/vbox_drv.c index a536c467e2b2..bb861f0a0a31 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_drv.c +++ b/drivers/gpu/drm/vboxvideo/vbox_drv.c @@ -13,8 +13,8 @@ #include <linux/pci.h> #include <linux/vt_kern.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_ttm.h> #include <drm/drm_file.h> @@ -189,7 +189,6 @@ static const struct drm_driver driver = { .fops = &vbox_fops, .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.h b/drivers/gpu/drm/vboxvideo/vbox_drv.h index e77bd6512eb1..dfa935f381a6 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_drv.h +++ b/drivers/gpu/drm/vboxvideo/vbox_drv.h @@ -25,7 +25,6 @@ #define DRIVER_NAME "vboxvideo" #define DRIVER_DESC "Oracle VM VirtualBox Graphics Card" -#define DRIVER_DATE "20130823" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 diff --git a/drivers/gpu/drm/vc4/tests/vc4_mock.c b/drivers/gpu/drm/vc4/tests/vc4_mock.c index 6527fb1db71e..e276a957b01c 100644 --- a/drivers/gpu/drm/vc4/tests/vc4_mock.c +++ b/drivers/gpu/drm/vc4/tests/vc4_mock.c @@ -51,8 +51,8 @@ struct vc4_mock_desc { static const struct vc4_mock_desc vc4_mock = VC4_MOCK_DESC( - VC4_MOCK_CRTC_DESC(&vc4_txp_crtc_data, - VC4_MOCK_OUTPUT_DESC(VC4_ENCODER_TYPE_TXP, + VC4_MOCK_CRTC_DESC(&bcm2835_txp_data.base, + VC4_MOCK_OUTPUT_DESC(VC4_ENCODER_TYPE_TXP0, DRM_MODE_ENCODER_VIRTUAL, DRM_MODE_CONNECTOR_WRITEBACK)), VC4_MOCK_PIXELVALVE_DESC(&bcm2835_pv0_data, @@ -77,8 +77,8 @@ static const struct vc4_mock_desc vc4_mock = static const struct vc4_mock_desc vc5_mock = VC4_MOCK_DESC( - VC4_MOCK_CRTC_DESC(&vc4_txp_crtc_data, - VC4_MOCK_OUTPUT_DESC(VC4_ENCODER_TYPE_TXP, + VC4_MOCK_CRTC_DESC(&bcm2835_txp_data.base, + VC4_MOCK_OUTPUT_DESC(VC4_ENCODER_TYPE_TXP0, DRM_MODE_ENCODER_VIRTUAL, DRM_MODE_CONNECTOR_WRITEBACK)), VC4_MOCK_PIXELVALVE_DESC(&bcm2711_pv0_data, diff --git a/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c b/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c index 61622e951031..40a05869a50e 100644 --- a/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c +++ b/drivers/gpu/drm/vc4/tests/vc4_test_pv_muxing.c @@ -90,7 +90,7 @@ static const struct encoder_constraint vc4_encoder_constraints[] = { ENCODER_CONSTRAINT(VC4_ENCODER_TYPE_DSI0, 0), ENCODER_CONSTRAINT(VC4_ENCODER_TYPE_HDMI0, 1), ENCODER_CONSTRAINT(VC4_ENCODER_TYPE_VEC, 1), - ENCODER_CONSTRAINT(VC4_ENCODER_TYPE_TXP, 2), + ENCODER_CONSTRAINT(VC4_ENCODER_TYPE_TXP0, 2), ENCODER_CONSTRAINT(VC4_ENCODER_TYPE_DSI1, 2), }; @@ -98,7 +98,7 @@ static const struct encoder_constraint vc5_encoder_constraints[] = { ENCODER_CONSTRAINT(VC4_ENCODER_TYPE_DPI, 0), ENCODER_CONSTRAINT(VC4_ENCODER_TYPE_DSI0, 0), ENCODER_CONSTRAINT(VC4_ENCODER_TYPE_VEC, 1), - ENCODER_CONSTRAINT(VC4_ENCODER_TYPE_TXP, 0, 2), + ENCODER_CONSTRAINT(VC4_ENCODER_TYPE_TXP0, 0, 2), ENCODER_CONSTRAINT(VC4_ENCODER_TYPE_DSI1, 0, 1, 2), ENCODER_CONSTRAINT(VC4_ENCODER_TYPE_HDMI0, 0, 1, 2), ENCODER_CONSTRAINT(VC4_ENCODER_TYPE_HDMI1, 0, 1, 2), @@ -207,7 +207,7 @@ static const struct pv_muxing_param vc4_test_pv_muxing_params[] = { VC4_PV_MUXING_TEST("1 output: DSI1", VC4_ENCODER_TYPE_DSI1), VC4_PV_MUXING_TEST("1 output: TXP", - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC4_PV_MUXING_TEST("2 outputs: DSI0, HDMI0", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_HDMI0), @@ -219,7 +219,7 @@ static const struct pv_muxing_param vc4_test_pv_muxing_params[] = { VC4_ENCODER_TYPE_DSI1), VC4_PV_MUXING_TEST("2 outputs: DSI0, TXP", VC4_ENCODER_TYPE_DSI0, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC4_PV_MUXING_TEST("2 outputs: DPI, HDMI0", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_HDMI0), @@ -231,19 +231,19 @@ static const struct pv_muxing_param vc4_test_pv_muxing_params[] = { VC4_ENCODER_TYPE_DSI1), VC4_PV_MUXING_TEST("2 outputs: DPI, TXP", VC4_ENCODER_TYPE_DPI, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC4_PV_MUXING_TEST("2 outputs: HDMI0, DSI1", VC4_ENCODER_TYPE_HDMI0, VC4_ENCODER_TYPE_DSI1), VC4_PV_MUXING_TEST("2 outputs: HDMI0, TXP", VC4_ENCODER_TYPE_HDMI0, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC4_PV_MUXING_TEST("2 outputs: VEC, DSI1", VC4_ENCODER_TYPE_VEC, VC4_ENCODER_TYPE_DSI1), VC4_PV_MUXING_TEST("2 outputs: VEC, TXP", VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC4_PV_MUXING_TEST("3 outputs: DSI0, HDMI0, DSI1", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_HDMI0, @@ -251,7 +251,7 @@ static const struct pv_muxing_param vc4_test_pv_muxing_params[] = { VC4_PV_MUXING_TEST("3 outputs: DSI0, HDMI0, TXP", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_HDMI0, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC4_PV_MUXING_TEST("3 outputs: DSI0, VEC, DSI1", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_VEC, @@ -259,7 +259,7 @@ static const struct pv_muxing_param vc4_test_pv_muxing_params[] = { VC4_PV_MUXING_TEST("3 outputs: DSI0, VEC, TXP", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC4_PV_MUXING_TEST("3 outputs: DPI, HDMI0, DSI1", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_HDMI0, @@ -267,7 +267,7 @@ static const struct pv_muxing_param vc4_test_pv_muxing_params[] = { VC4_PV_MUXING_TEST("3 outputs: DPI, HDMI0, TXP", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_HDMI0, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC4_PV_MUXING_TEST("3 outputs: DPI, VEC, DSI1", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_VEC, @@ -275,7 +275,7 @@ static const struct pv_muxing_param vc4_test_pv_muxing_params[] = { VC4_PV_MUXING_TEST("3 outputs: DPI, VEC, TXP", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), }; KUNIT_ARRAY_PARAM(vc4_test_pv_muxing, @@ -287,7 +287,7 @@ static const struct pv_muxing_param vc4_test_pv_muxing_invalid_params[] = { VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_DSI0), VC4_PV_MUXING_TEST("TXP/DSI1 Conflict", - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1), VC4_PV_MUXING_TEST("HDMI0/VEC Conflict", VC4_ENCODER_TYPE_HDMI0, @@ -296,22 +296,22 @@ static const struct pv_muxing_param vc4_test_pv_muxing_invalid_params[] = { VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_HDMI0, VC4_ENCODER_TYPE_DSI1, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC4_PV_MUXING_TEST("More than 3 outputs: DSI0, VEC, DSI1, TXP", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_VEC, VC4_ENCODER_TYPE_DSI1, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC4_PV_MUXING_TEST("More than 3 outputs: DPI, HDMI0, DSI1, TXP", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_HDMI0, VC4_ENCODER_TYPE_DSI1, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC4_PV_MUXING_TEST("More than 3 outputs: DPI, VEC, DSI1, TXP", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_VEC, VC4_ENCODER_TYPE_DSI1, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), }; KUNIT_ARRAY_PARAM(vc4_test_pv_muxing_invalid, @@ -342,7 +342,7 @@ static const struct pv_muxing_param vc5_test_pv_muxing_params[] = { VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("2 outputs: DPI, TXP", VC4_ENCODER_TYPE_DPI, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC5_PV_MUXING_TEST("2 outputs: DPI, VEC", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_VEC), @@ -360,7 +360,7 @@ static const struct pv_muxing_param vc5_test_pv_muxing_params[] = { VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("2 outputs: DSI0, TXP", VC4_ENCODER_TYPE_DSI0, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC5_PV_MUXING_TEST("2 outputs: DSI0, VEC", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_VEC), @@ -372,7 +372,7 @@ static const struct pv_muxing_param vc5_test_pv_muxing_params[] = { VC4_ENCODER_TYPE_VEC), VC5_PV_MUXING_TEST("2 outputs: DSI1, TXP", VC4_ENCODER_TYPE_DSI1, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC5_PV_MUXING_TEST("2 outputs: DSI1, HDMI0", VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_HDMI0), @@ -384,7 +384,7 @@ static const struct pv_muxing_param vc5_test_pv_muxing_params[] = { VC4_ENCODER_TYPE_VEC), VC5_PV_MUXING_TEST("2 outputs: HDMI0, TXP", VC4_ENCODER_TYPE_HDMI0, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC5_PV_MUXING_TEST("2 outputs: HDMI0, HDMI1", VC4_ENCODER_TYPE_HDMI0, VC4_ENCODER_TYPE_HDMI1), @@ -393,14 +393,14 @@ static const struct pv_muxing_param vc5_test_pv_muxing_params[] = { VC4_ENCODER_TYPE_VEC), VC5_PV_MUXING_TEST("2 outputs: HDMI1, TXP", VC4_ENCODER_TYPE_HDMI1, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC5_PV_MUXING_TEST("2 outputs: TXP, VEC", - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_VEC), VC5_PV_MUXING_TEST("3 outputs: DPI, VEC, TXP", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC5_PV_MUXING_TEST("3 outputs: DPI, VEC, DSI1", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_VEC, @@ -415,15 +415,15 @@ static const struct pv_muxing_param vc5_test_pv_muxing_params[] = { VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("3 outputs: DPI, TXP, DSI1", VC4_ENCODER_TYPE_DPI, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1), VC5_PV_MUXING_TEST("3 outputs: DPI, TXP, HDMI0", VC4_ENCODER_TYPE_DPI, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_HDMI0), VC5_PV_MUXING_TEST("3 outputs: DPI, TXP, HDMI1", VC4_ENCODER_TYPE_DPI, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("3 outputs: DPI, DSI1, HDMI0", VC4_ENCODER_TYPE_DPI, @@ -440,7 +440,7 @@ static const struct pv_muxing_param vc5_test_pv_muxing_params[] = { VC5_PV_MUXING_TEST("3 outputs: DSI0, VEC, TXP", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP), + VC4_ENCODER_TYPE_TXP0), VC5_PV_MUXING_TEST("3 outputs: DSI0, VEC, DSI1", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_VEC, @@ -455,15 +455,15 @@ static const struct pv_muxing_param vc5_test_pv_muxing_params[] = { VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("3 outputs: DSI0, TXP, DSI1", VC4_ENCODER_TYPE_DSI0, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1), VC5_PV_MUXING_TEST("3 outputs: DSI0, TXP, HDMI0", VC4_ENCODER_TYPE_DSI0, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_HDMI0), VC5_PV_MUXING_TEST("3 outputs: DSI0, TXP, HDMI1", VC4_ENCODER_TYPE_DSI0, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("3 outputs: DSI0, DSI1, HDMI0", VC4_ENCODER_TYPE_DSI0, @@ -490,17 +490,17 @@ static const struct pv_muxing_param vc5_test_pv_muxing_invalid_params[] = { VC5_PV_MUXING_TEST("More than 3 outputs: DPI, VEC, TXP, DSI1", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1), VC5_PV_MUXING_TEST("More than 3 outputs: DPI, VEC, TXP, HDMI0", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_HDMI0), VC5_PV_MUXING_TEST("More than 3 outputs: DPI, VEC, TXP, HDMI1", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DPI, VEC, DSI1, HDMI0", VC4_ENCODER_TYPE_DPI, @@ -519,17 +519,17 @@ static const struct pv_muxing_param vc5_test_pv_muxing_invalid_params[] = { VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DPI, TXP, DSI1, HDMI0", VC4_ENCODER_TYPE_DPI, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_HDMI0), VC5_PV_MUXING_TEST("More than 3 outputs: DPI, TXP, DSI1, HDMI1", VC4_ENCODER_TYPE_DPI, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DPI, TXP, HDMI0, HDMI1", VC4_ENCODER_TYPE_DPI, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_HDMI0, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DPI, DSI1, HDMI0, HDMI1", @@ -540,19 +540,19 @@ static const struct pv_muxing_param vc5_test_pv_muxing_invalid_params[] = { VC5_PV_MUXING_TEST("More than 3 outputs: DPI, VEC, TXP, DSI1, HDMI0", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_HDMI0), VC5_PV_MUXING_TEST("More than 3 outputs: DPI, VEC, TXP, DSI1, HDMI1", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DPI, VEC, TXP, HDMI0, HDMI1", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_HDMI0, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DPI, VEC, DSI1, HDMI0, HDMI1", @@ -563,24 +563,24 @@ static const struct pv_muxing_param vc5_test_pv_muxing_invalid_params[] = { VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DPI, TXP, DSI1, HDMI0, HDMI1", VC4_ENCODER_TYPE_DPI, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_HDMI0, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DSI0, VEC, TXP, DSI1", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1), VC5_PV_MUXING_TEST("More than 3 outputs: DSI0, VEC, TXP, HDMI0", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_HDMI0), VC5_PV_MUXING_TEST("More than 3 outputs: DSI0, VEC, TXP, HDMI1", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DSI0, VEC, DSI1, HDMI0", VC4_ENCODER_TYPE_DSI0, @@ -599,17 +599,17 @@ static const struct pv_muxing_param vc5_test_pv_muxing_invalid_params[] = { VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DSI0, TXP, DSI1, HDMI0", VC4_ENCODER_TYPE_DSI0, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_HDMI0), VC5_PV_MUXING_TEST("More than 3 outputs: DSI0, TXP, DSI1, HDMI1", VC4_ENCODER_TYPE_DSI0, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DSI0, TXP, HDMI0, HDMI1", VC4_ENCODER_TYPE_DSI0, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_HDMI0, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DSI0, DSI1, HDMI0, HDMI1", @@ -620,19 +620,19 @@ static const struct pv_muxing_param vc5_test_pv_muxing_invalid_params[] = { VC5_PV_MUXING_TEST("More than 3 outputs: DSI0, VEC, TXP, DSI1, HDMI0", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_HDMI0), VC5_PV_MUXING_TEST("More than 3 outputs: DSI0, VEC, TXP, DSI1, HDMI1", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DSI0, VEC, TXP, HDMI0, HDMI1", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_HDMI0, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DSI0, VEC, DSI1, HDMI0, HDMI1", @@ -643,27 +643,27 @@ static const struct pv_muxing_param vc5_test_pv_muxing_invalid_params[] = { VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DSI0, TXP, DSI1, HDMI0, HDMI1", VC4_ENCODER_TYPE_DSI0, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_HDMI0, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: VEC, TXP, DSI1, HDMI0, HDMI1", VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_HDMI0, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DPI, VEC, TXP, DSI1, HDMI0, HDMI1", VC4_ENCODER_TYPE_DPI, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_HDMI0, VC4_ENCODER_TYPE_HDMI1), VC5_PV_MUXING_TEST("More than 3 outputs: DSI0, VEC, TXP, DSI1, HDMI0, HDMI1", VC4_ENCODER_TYPE_DSI0, VC4_ENCODER_TYPE_VEC, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_HDMI0, VC4_ENCODER_TYPE_HDMI1), diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index ee82a959d279..cf40a53ad42e 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -83,13 +83,22 @@ static unsigned int vc4_crtc_get_cob_allocation(struct vc4_dev *vc4, unsigned int channel) { struct vc4_hvs *hvs = vc4->hvs; - u32 dispbase = HVS_READ(SCALER_DISPBASEX(channel)); + u32 dispbase, top, base; + /* Top/base are supposed to be 4-pixel aligned, but the * Raspberry Pi firmware fills the low bits (which are * presumably ignored). */ - u32 top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3; - u32 base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3; + + if (vc4->gen >= VC4_GEN_6_C) { + dispbase = HVS_READ(SCALER6_DISPX_COB(channel)); + top = VC4_GET_FIELD(dispbase, SCALER6_DISPX_COB_TOP) & ~3; + base = VC4_GET_FIELD(dispbase, SCALER6_DISPX_COB_BASE) & ~3; + } else { + dispbase = HVS_READ(SCALER_DISPBASEX(channel)); + top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3; + base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3; + } return top - base + 4; } @@ -122,7 +131,10 @@ static bool vc4_crtc_get_scanout_position(struct drm_crtc *crtc, * Read vertical scanline which is currently composed for our * pixelvalve by the HVS, and also the scaler status. */ - val = HVS_READ(SCALER_DISPSTATX(channel)); + if (vc4->gen >= VC4_GEN_6_C) + val = HVS_READ(SCALER6_DISPX_STATUS(channel)); + else + val = HVS_READ(SCALER_DISPSTATX(channel)); /* Get optional system timestamp after query. */ if (etime) @@ -131,7 +143,12 @@ static bool vc4_crtc_get_scanout_position(struct drm_crtc *crtc, /* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */ /* Vertical position of hvs composed scanline. */ - *vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE); + + if (vc4->gen >= VC4_GEN_6_C) + *vpos = VC4_GET_FIELD(val, SCALER6_DISPX_STATUS_YLINE); + else + *vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE); + *hpos = 0; if (mode->flags & DRM_MODE_FLAG_INTERLACE) { @@ -223,6 +240,11 @@ static u32 vc4_get_fifo_full_level(struct vc4_crtc *vc4_crtc, u32 format) const struct vc4_crtc_data *crtc_data = vc4_crtc_to_vc4_crtc_data(vc4_crtc); const struct vc4_pv_data *pv_data = vc4_crtc_to_vc4_pv_data(vc4_crtc); struct vc4_dev *vc4 = to_vc4_dev(vc4_crtc->base.dev); + + /* + * NOTE: Could we use register 0x68 (PV_HW_CFG1) to get the FIFO + * size? + */ u32 fifo_len_bytes = pv_data->fifo_depth; /* @@ -404,6 +426,7 @@ static void vc4_crtc_config_pv(struct drm_crtc *crtc, struct drm_encoder *encode */ CRTC_WRITE(PV_V_CONTROL, PV_VCONTROL_CONTINUOUS | + (vc4->gen >= VC4_GEN_6_C ? PV_VCONTROL_ODD_TIMING : 0) | (is_dsi ? PV_VCONTROL_DSI : 0) | PV_VCONTROL_INTERLACE | (odd_field_first @@ -415,6 +438,7 @@ static void vc4_crtc_config_pv(struct drm_crtc *crtc, struct drm_encoder *encode } else { CRTC_WRITE(PV_V_CONTROL, PV_VCONTROL_CONTINUOUS | + (vc4->gen >= VC4_GEN_6_C ? PV_VCONTROL_ODD_TIMING : 0) | (is_dsi ? PV_VCONTROL_DSI : 0)); CRTC_WRITE(PV_VSYNCD_EVEN, 0); } @@ -429,11 +453,17 @@ static void vc4_crtc_config_pv(struct drm_crtc *crtc, struct drm_encoder *encode if (is_dsi) CRTC_WRITE(PV_HACT_ACT, mode->hdisplay * pixel_rep); - if (vc4->gen == VC4_GEN_5) + if (vc4->gen >= VC4_GEN_5) CRTC_WRITE(PV_MUX_CFG, VC4_SET_FIELD(PV_MUX_CFG_RGB_PIXEL_MUX_MODE_NO_SWAP, PV_MUX_CFG_RGB_PIXEL_MUX_MODE)); + if (vc4->gen >= VC4_GEN_6_C) + CRTC_WRITE(PV_PIPE_INIT_CTRL, + VC4_SET_FIELD(1, PV_PIPE_INIT_CTRL_PV_INIT_WIDTH) | + VC4_SET_FIELD(1, PV_PIPE_INIT_CTRL_PV_INIT_IDLE) | + PV_PIPE_INIT_CTRL_PV_INIT_EN); + CRTC_WRITE(PV_CONTROL, PV_CONTROL_FIFO_CLR | vc4_crtc_get_fifo_full_level_bits(vc4_crtc, format) | VC4_SET_FIELD(format, PV_CONTROL_FORMAT) | @@ -459,8 +489,10 @@ static void require_hvs_enabled(struct drm_device *dev) struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_hvs *hvs = vc4->hvs; - WARN_ON_ONCE((HVS_READ(SCALER_DISPCTRL) & SCALER_DISPCTRL_ENABLE) != - SCALER_DISPCTRL_ENABLE); + if (vc4->gen >= VC4_GEN_6_C) + WARN_ON_ONCE(!(HVS_READ(SCALER6_CONTROL) & SCALER6_CONTROL_HVS_EN)); + else + WARN_ON_ONCE(!(HVS_READ(SCALER_DISPCTRL) & SCALER_DISPCTRL_ENABLE)); } static int vc4_crtc_disable(struct drm_crtc *crtc, @@ -530,7 +562,11 @@ int vc4_crtc_disable_at_boot(struct drm_crtc *crtc) if (!(of_device_is_compatible(vc4_crtc->pdev->dev.of_node, "brcm,bcm2711-pixelvalve2") || of_device_is_compatible(vc4_crtc->pdev->dev.of_node, - "brcm,bcm2711-pixelvalve4"))) + "brcm,bcm2711-pixelvalve4") || + of_device_is_compatible(vc4_crtc->pdev->dev.of_node, + "brcm,bcm2712-pixelvalve0") || + of_device_is_compatible(vc4_crtc->pdev->dev.of_node, + "brcm,bcm2712-pixelvalve1"))) return 0; if (!(CRTC_READ(PV_CONTROL) & PV_CONTROL_EN)) @@ -789,14 +825,21 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc) struct drm_device *dev = crtc->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_hvs *hvs = vc4->hvs; + unsigned int current_dlist; u32 chan = vc4_crtc->current_hvs_channel; unsigned long flags; spin_lock_irqsave(&dev->event_lock, flags); spin_lock(&vc4_crtc->irq_lock); + + if (vc4->gen >= VC4_GEN_6_C) + current_dlist = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_DL(chan)), + SCALER6_DISPX_DL_LACT); + else + current_dlist = HVS_READ(SCALER_DISPLACTX(chan)); + if (vc4_crtc->event && - (vc4_crtc->current_dlist == HVS_READ(SCALER_DISPLACTX(chan)) || - vc4_crtc->feeds_txp)) { + (vc4_crtc->current_dlist == current_dlist || vc4_crtc->feeds_txp)) { drm_crtc_send_vblank_event(crtc, vc4_crtc->event); vc4_crtc->event = NULL; drm_crtc_vblank_put(crtc); @@ -807,7 +850,8 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc) * the CRTC and encoder already reconfigured, leading to * underruns. This can be seen when reconfiguring the CRTC. */ - vc4_hvs_unmask_underrun(hvs, chan); + if (vc4->gen < VC4_GEN_6_C) + vc4_hvs_unmask_underrun(hvs, chan); } spin_unlock(&vc4_crtc->irq_lock); spin_unlock_irqrestore(&dev->event_lock, flags); @@ -1265,6 +1309,32 @@ const struct vc4_pv_data bcm2711_pv4_data = { }, }; +const struct vc4_pv_data bcm2712_pv0_data = { + .base = { + .debugfs_name = "crtc0_regs", + .hvs_available_channels = BIT(0), + .hvs_output = 0, + }, + .fifo_depth = 64, + .pixels_per_clock = 1, + .encoder_types = { + [0] = VC4_ENCODER_TYPE_HDMI0, + }, +}; + +const struct vc4_pv_data bcm2712_pv1_data = { + .base = { + .debugfs_name = "crtc1_regs", + .hvs_available_channels = BIT(1), + .hvs_output = 1, + }, + .fifo_depth = 64, + .pixels_per_clock = 1, + .encoder_types = { + [0] = VC4_ENCODER_TYPE_HDMI1, + }, +}; + static const struct of_device_id vc4_crtc_dt_match[] = { { .compatible = "brcm,bcm2835-pixelvalve0", .data = &bcm2835_pv0_data }, { .compatible = "brcm,bcm2835-pixelvalve1", .data = &bcm2835_pv1_data }, @@ -1274,6 +1344,8 @@ static const struct of_device_id vc4_crtc_dt_match[] = { { .compatible = "brcm,bcm2711-pixelvalve2", .data = &bcm2711_pv2_data }, { .compatible = "brcm,bcm2711-pixelvalve3", .data = &bcm2711_pv3_data }, { .compatible = "brcm,bcm2711-pixelvalve4", .data = &bcm2711_pv4_data }, + { .compatible = "brcm,bcm2712-pixelvalve0", .data = &bcm2712_pv0_data }, + { .compatible = "brcm,bcm2712-pixelvalve1", .data = &bcm2712_pv1_data }, {} }; diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 2c60d37275b0..c7cb1e3a6434 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -31,8 +31,8 @@ #include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_dma.h> #include <drm/drm_fourcc.h> @@ -47,7 +47,6 @@ #define DRIVER_NAME "vc4" #define DRIVER_DESC "Broadcom VC4 graphics" -#define DRIVER_DATE "20140616" #define DRIVER_MAJOR 0 #define DRIVER_MINOR 0 #define DRIVER_PATCHLEVEL 0 @@ -222,7 +221,6 @@ const struct drm_driver vc4_drm_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, @@ -244,7 +242,6 @@ const struct drm_driver vc5_drm_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, @@ -279,6 +276,7 @@ static void vc4_component_unbind_all(void *ptr) static const struct of_device_id vc4_dma_range_matches[] = { { .compatible = "brcm,bcm2711-hvs" }, + { .compatible = "brcm,bcm2712-hvs" }, { .compatible = "brcm,bcm2835-hvs" }, { .compatible = "brcm,bcm2835-v3d" }, { .compatible = "brcm,cygnus-v3d" }, @@ -300,16 +298,18 @@ static int vc4_drm_bind(struct device *dev) dev->coherent_dma_mask = DMA_BIT_MASK(32); - if (of_device_is_compatible(dev->of_node, "brcm,bcm2711-vc5")) - gen = VC4_GEN_5; - else - gen = VC4_GEN_4; + gen = (enum vc4_gen)of_device_get_match_data(dev); if (gen > VC4_GEN_4) driver = &vc5_drm_driver; else driver = &vc4_drm_driver; + if (gen >= VC4_GEN_6_C) + dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36)); + else + dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + node = of_find_matching_node_and_match(NULL, vc4_dma_range_matches, NULL); if (node) { @@ -462,9 +462,11 @@ static void vc4_platform_drm_shutdown(struct platform_device *pdev) } static const struct of_device_id vc4_of_match[] = { - { .compatible = "brcm,bcm2711-vc5", }, - { .compatible = "brcm,bcm2835-vc4", }, - { .compatible = "brcm,cygnus-vc4", }, + { .compatible = "brcm,bcm2711-vc5", .data = (void *)VC4_GEN_5 }, + /* NB GEN_6_C will be corrected on D0 hw to GEN_6_D via vc4_hvs_bind */ + { .compatible = "brcm,bcm2712-vc6", .data = (void *)VC4_GEN_6_C }, + { .compatible = "brcm,bcm2835-vc4", .data = (void *)VC4_GEN_4 }, + { .compatible = "brcm,cygnus-vc4", .data = (void *)VC4_GEN_4 }, {}, }; MODULE_DEVICE_TABLE(of, vc4_of_match); diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index c6be1997f1c7..4a078ffd9f82 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -84,6 +84,8 @@ struct vc4_perfmon { enum vc4_gen { VC4_GEN_4, VC4_GEN_5, + VC4_GEN_6_C, + VC4_GEN_6_D, }; struct vc4_dev { @@ -316,6 +318,21 @@ struct vc4_v3d { struct debugfs_regset32 regset; }; +#define VC4_NUM_UPM_HANDLES 32 +struct vc4_upm_refcounts { + refcount_t refcount; + + /* Allocation size */ + size_t size; + /* Our allocation in UPM for prefetching. */ + struct drm_mm_node upm; + + /* Pointer back to the HVS structure */ + struct vc4_hvs *hvs; +}; + +#define HVS_NUM_CHANNELS 3 + struct vc4_hvs { struct vc4_dev *vc4; struct platform_device *pdev; @@ -324,6 +341,7 @@ struct vc4_hvs { unsigned int dlist_mem_size; struct clk *core_clk; + struct clk *disp_clk; unsigned long max_core_rate; @@ -331,8 +349,15 @@ struct vc4_hvs { * list. Units are dwords. */ struct drm_mm dlist_mm; + /* Memory manager for the LBM memory used by HVS scaling. */ struct drm_mm lbm_mm; + + /* Memory manager for the UPM memory used for prefetching. */ + struct drm_mm upm_mm; + struct ida upm_handles; + struct vc4_upm_refcounts upm_refcounts[VC4_NUM_UPM_HANDLES + 1]; + spinlock_t mm_lock; struct drm_mm_node mitchell_netravali_filter; @@ -355,6 +380,7 @@ struct vc4_hvs { }; #define HVS_NUM_CHANNELS 3 +#define HVS_UBM_WORD_SIZE 256 struct vc4_hvs_state { struct drm_private_state base; @@ -424,6 +450,12 @@ struct vc4_plane_state { /* Our allocation in LBM for temporary storage during scaling. */ struct drm_mm_node lbm; + /* The Unified Pre-Fetcher Handle */ + unsigned int upm_handle[DRM_FORMAT_MAX_PLANES]; + + /* Number of lines to pre-fetch */ + unsigned int upm_buffer_lines; + /* Set when the plane has per-pixel alpha content or does not cover * the entire screen. This is a hint to the CRTC that it might need * to enable background color fill. @@ -458,7 +490,8 @@ enum vc4_encoder_type { VC4_ENCODER_TYPE_DSI1, VC4_ENCODER_TYPE_SMI, VC4_ENCODER_TYPE_DPI, - VC4_ENCODER_TYPE_TXP, + VC4_ENCODER_TYPE_TXP0, + VC4_ENCODER_TYPE_TXP1, }; struct vc4_encoder { @@ -505,7 +538,16 @@ struct vc4_crtc_data { int hvs_output; }; -extern const struct vc4_crtc_data vc4_txp_crtc_data; +struct vc4_txp_data { + struct vc4_crtc_data base; + enum vc4_encoder_type encoder_type; + unsigned int high_addr_ptr_reg; + unsigned int has_byte_enable:1; + unsigned int size_minus_one:1; + unsigned int supports_40bit_addresses:1; +}; + +extern const struct vc4_txp_data bcm2835_txp_data; struct vc4_pv_data { struct vc4_crtc_data base; @@ -527,6 +569,8 @@ extern const struct vc4_pv_data bcm2711_pv1_data; extern const struct vc4_pv_data bcm2711_pv2_data; extern const struct vc4_pv_data bcm2711_pv3_data; extern const struct vc4_pv_data bcm2711_pv4_data; +extern const struct vc4_pv_data bcm2712_pv0_data; +extern const struct vc4_pv_data bcm2712_pv1_data; struct vc4_crtc { struct drm_crtc base; @@ -637,6 +681,12 @@ struct vc4_crtc_state { writel(val, hvs->regs + (offset)); \ } while (0) +#define HVS_READ6(offset) \ + HVS_READ(hvs->vc4->gen == VC4_GEN_6_C ? SCALER6_ ## offset : SCALER6D_ ## offset) + +#define HVS_WRITE6(offset, val) \ + HVS_WRITE(hvs->vc4->gen == VC4_GEN_6_C ? SCALER6_ ## offset : SCALER6D_ ## offset, val) + #define VC4_REG32(reg) { .name = #reg, .offset = reg } struct vc4_exec_info { diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index e3818c48c9b8..203293a8beca 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -580,6 +580,7 @@ static const struct drm_connector_helper_funcs vc4_hdmi_connector_helper_funcs = .detect_ctx = vc4_hdmi_connector_detect_ctx, .get_modes = vc4_hdmi_connector_get_modes, .atomic_check = vc4_hdmi_connector_atomic_check, + .mode_valid = drm_hdmi_connector_mode_valid, }; static const struct drm_connector_hdmi_funcs vc4_hdmi_hdmi_connector_funcs; @@ -845,6 +846,7 @@ static void vc4_hdmi_encoder_post_crtc_disable(struct drm_encoder *encoder, { struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); struct drm_device *drm = vc4_hdmi->connector.dev; + struct vc4_dev *vc4 = to_vc4_dev(drm); unsigned long flags; int idx; @@ -861,14 +863,25 @@ static void vc4_hdmi_encoder_post_crtc_disable(struct drm_encoder *encoder, HDMI_WRITE(HDMI_VID_CTL, HDMI_READ(HDMI_VID_CTL) | VC4_HD_VID_CTL_CLRRGB); + if (vc4->gen >= VC4_GEN_6_C) + HDMI_WRITE(HDMI_VID_CTL, HDMI_READ(HDMI_VID_CTL) | + VC4_HD_VID_CTL_BLANKPIX); + spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); mdelay(1); - spin_lock_irqsave(&vc4_hdmi->hw_lock, flags); - HDMI_WRITE(HDMI_VID_CTL, - HDMI_READ(HDMI_VID_CTL) & ~VC4_HD_VID_CTL_ENABLE); - spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); + /* + * TODO: This should work on BCM2712, but doesn't for some + * reason and result in a system lockup. + */ + if (vc4->gen < VC4_GEN_6_C) { + spin_lock_irqsave(&vc4_hdmi->hw_lock, flags); + HDMI_WRITE(HDMI_VID_CTL, + HDMI_READ(HDMI_VID_CTL) & + ~VC4_HD_VID_CTL_ENABLE); + spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); + } vc4_hdmi_disable_scrambling(encoder); @@ -1488,7 +1501,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder, goto err_put_runtime_pm; } - vc4_hdmi_cec_update_clk_div(vc4_hdmi); if (tmds_char_rate > 297000000) @@ -1594,6 +1606,8 @@ static void vc4_hdmi_encoder_post_crtc_enable(struct drm_encoder *encoder, spin_lock_irqsave(&vc4_hdmi->hw_lock, flags); HDMI_WRITE(HDMI_VID_CTL, + (HDMI_READ(HDMI_VID_CTL) & + ~(VC4_HD_VID_CTL_VSYNC_LOW | VC4_HD_VID_CTL_HSYNC_LOW)) | VC4_HD_VID_CTL_ENABLE | VC4_HD_VID_CTL_CLRRGB | VC4_HD_VID_CTL_UNDERFLOW_ENABLE | @@ -1752,7 +1766,6 @@ vc4_hdmi_encoder_mode_valid(struct drm_encoder *encoder, const struct drm_display_mode *mode) { struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); - unsigned long long rate; if (vc4_hdmi->variant->unsupported_odd_h_timings && !(mode->flags & DRM_MODE_FLAG_DBLCLK) && @@ -1760,8 +1773,7 @@ vc4_hdmi_encoder_mode_valid(struct drm_encoder *encoder, (mode->hsync_end % 2) || (mode->htotal % 2))) return MODE_H_ILLEGAL; - rate = drm_hdmi_compute_mode_clock(mode, 8, HDMI_COLORSPACE_RGB); - return vc4_hdmi_connector_clock_valid(&vc4_hdmi->connector, mode, rate); + return MODE_OK; } static const struct drm_encoder_helper_funcs vc4_hdmi_encoder_helper_funcs = { @@ -2110,18 +2122,33 @@ static int vc4_hdmi_audio_prepare(struct device *dev, void *data, VC4_HDMI_AUDIO_PACKET_CEA_MASK); /* Set the MAI threshold */ - if (vc4->gen >= VC4_GEN_5) + switch (vc4->gen) { + case VC4_GEN_6_D: + HDMI_WRITE(HDMI_MAI_THR, + VC4_SET_FIELD(0x10, VC6_D_HD_MAI_THR_PANICHIGH) | + VC4_SET_FIELD(0x10, VC6_D_HD_MAI_THR_PANICLOW) | + VC4_SET_FIELD(0x1c, VC6_D_HD_MAI_THR_DREQHIGH) | + VC4_SET_FIELD(0x1c, VC6_D_HD_MAI_THR_DREQLOW)); + break; + case VC4_GEN_6_C: + case VC4_GEN_5: HDMI_WRITE(HDMI_MAI_THR, VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_PANICHIGH) | VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_PANICLOW) | VC4_SET_FIELD(0x1c, VC4_HD_MAI_THR_DREQHIGH) | VC4_SET_FIELD(0x1c, VC4_HD_MAI_THR_DREQLOW)); - else + break; + case VC4_GEN_4: HDMI_WRITE(HDMI_MAI_THR, VC4_SET_FIELD(0x8, VC4_HD_MAI_THR_PANICHIGH) | VC4_SET_FIELD(0x8, VC4_HD_MAI_THR_PANICLOW) | VC4_SET_FIELD(0x6, VC4_HD_MAI_THR_DREQHIGH) | VC4_SET_FIELD(0x8, VC4_HD_MAI_THR_DREQLOW)); + break; + default: + drm_err(drm, "Unknown VC4 generation: %d", vc4->gen); + break; + } HDMI_WRITE(HDMI_MAI_CONFIG, VC4_HDMI_MAI_CONFIG_BIT_REVERSE | @@ -2193,9 +2220,9 @@ static int vc4_hdmi_audio_get_eld(struct device *dev, void *data, struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); struct drm_connector *connector = &vc4_hdmi->connector; - mutex_lock(&vc4_hdmi->mutex); + mutex_lock(&connector->eld_mutex); memcpy(buf, connector->eld, min(sizeof(connector->eld), len)); - mutex_unlock(&vc4_hdmi->mutex); + mutex_unlock(&connector->eld_mutex); return 0; } @@ -3121,6 +3148,7 @@ static int vc4_hdmi_runtime_suspend(struct device *dev) { struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); + clk_disable_unprepare(vc4_hdmi->audio_clock); clk_disable_unprepare(vc4_hdmi->hsm_clock); return 0; @@ -3153,6 +3181,10 @@ static int vc4_hdmi_runtime_resume(struct device *dev) goto err_disable_clk; } + ret = clk_prepare_enable(vc4_hdmi->audio_clock); + if (ret) + goto err_disable_clk; + if (vc4_hdmi->variant->reset) vc4_hdmi->variant->reset(vc4_hdmi); @@ -3273,7 +3305,9 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) return ret; if ((of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi0") || - of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi1")) && + of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi1") || + of_device_is_compatible(dev->of_node, "brcm,bcm2712-hdmi0") || + of_device_is_compatible(dev->of_node, "brcm,bcm2712-hdmi1")) && HDMI_READ(HDMI_VID_CTL) & VC4_HD_VID_CTL_ENABLE) { clk_prepare_enable(vc4_hdmi->pixel_clock); clk_prepare_enable(vc4_hdmi->hsm_clock); @@ -3407,10 +3441,66 @@ static const struct vc4_hdmi_variant bcm2711_hdmi1_variant = { .hp_detect = vc5_hdmi_hp_detect, }; +static const struct vc4_hdmi_variant bcm2712_hdmi0_variant = { + .encoder_type = VC4_ENCODER_TYPE_HDMI0, + .debugfs_name = "hdmi0_regs", + .card_name = "vc4-hdmi-0", + .max_pixel_clock = 600000000, + .registers = vc6_hdmi_hdmi0_fields, + .num_registers = ARRAY_SIZE(vc6_hdmi_hdmi0_fields), + .phy_lane_mapping = { + PHY_LANE_0, + PHY_LANE_1, + PHY_LANE_2, + PHY_LANE_CK, + }, + .unsupported_odd_h_timings = false, + .external_irq_controller = true, + + .init_resources = vc5_hdmi_init_resources, + .csc_setup = vc5_hdmi_csc_setup, + .reset = vc5_hdmi_reset, + .set_timings = vc5_hdmi_set_timings, + .phy_init = vc6_hdmi_phy_init, + .phy_disable = vc6_hdmi_phy_disable, + .channel_map = vc5_hdmi_channel_map, + .supports_hdr = true, + .hp_detect = vc5_hdmi_hp_detect, +}; + +static const struct vc4_hdmi_variant bcm2712_hdmi1_variant = { + .encoder_type = VC4_ENCODER_TYPE_HDMI1, + .debugfs_name = "hdmi1_regs", + .card_name = "vc4-hdmi-1", + .max_pixel_clock = 600000000, + .registers = vc6_hdmi_hdmi1_fields, + .num_registers = ARRAY_SIZE(vc6_hdmi_hdmi1_fields), + .phy_lane_mapping = { + PHY_LANE_0, + PHY_LANE_1, + PHY_LANE_2, + PHY_LANE_CK, + }, + .unsupported_odd_h_timings = false, + .external_irq_controller = true, + + .init_resources = vc5_hdmi_init_resources, + .csc_setup = vc5_hdmi_csc_setup, + .reset = vc5_hdmi_reset, + .set_timings = vc5_hdmi_set_timings, + .phy_init = vc6_hdmi_phy_init, + .phy_disable = vc6_hdmi_phy_disable, + .channel_map = vc5_hdmi_channel_map, + .supports_hdr = true, + .hp_detect = vc5_hdmi_hp_detect, +}; + static const struct of_device_id vc4_hdmi_dt_match[] = { { .compatible = "brcm,bcm2835-hdmi", .data = &bcm2835_variant }, { .compatible = "brcm,bcm2711-hdmi0", .data = &bcm2711_hdmi0_variant }, { .compatible = "brcm,bcm2711-hdmi1", .data = &bcm2711_hdmi1_variant }, + { .compatible = "brcm,bcm2712-hdmi0", .data = &bcm2712_hdmi0_variant }, + { .compatible = "brcm,bcm2712-hdmi1", .data = &bcm2712_hdmi1_variant }, {} }; diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.h b/drivers/gpu/drm/vc4/vc4_hdmi.h index b37f1d2c3fe5..b2424a21da23 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.h +++ b/drivers/gpu/drm/vc4/vc4_hdmi.h @@ -237,4 +237,8 @@ void vc5_hdmi_phy_disable(struct vc4_hdmi *vc4_hdmi); void vc5_hdmi_phy_rng_enable(struct vc4_hdmi *vc4_hdmi); void vc5_hdmi_phy_rng_disable(struct vc4_hdmi *vc4_hdmi); +void vc6_hdmi_phy_init(struct vc4_hdmi *vc4_hdmi, + struct drm_connector_state *conn_state); +void vc6_hdmi_phy_disable(struct vc4_hdmi *vc4_hdmi); + #endif /* _VC4_HDMI_H_ */ diff --git a/drivers/gpu/drm/vc4/vc4_hdmi_phy.c b/drivers/gpu/drm/vc4/vc4_hdmi_phy.c index 1f5507fc7a03..56e6a35da357 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi_phy.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi_phy.c @@ -125,6 +125,48 @@ #define VC4_HDMI_RM_FORMAT_SHIFT_SHIFT 24 #define VC4_HDMI_RM_FORMAT_SHIFT_MASK VC4_MASK(25, 24) +#define VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_BG_PWRUP BIT(8) +#define VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_LDO_PWRUP BIT(7) +#define VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_BIAS_PWRUP BIT(6) +#define VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_RNDGEN_PWRUP BIT(4) +#define VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_TX_CK_PWRUP BIT(3) +#define VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_TX_2_PWRUP BIT(2) +#define VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_TX_1_PWRUP BIT(1) +#define VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_TX_0_PWRUP BIT(0) + +#define VC6_HDMI_TX_PHY_PLL_REFCLK_REFCLK_SEL_CMOS BIT(13) +#define VC6_HDMI_TX_PHY_PLL_REFCLK_REFFRQ_MASK VC4_MASK(9, 0) + +#define VC6_HDMI_TX_PHY_PLL_POST_KDIV_CLK0_SEL_MASK VC4_MASK(3, 2) +#define VC6_HDMI_TX_PHY_PLL_POST_KDIV_KDIV_MASK VC4_MASK(1, 0) + +#define VC6_HDMI_TX_PHY_PLL_VCOCLK_DIV_VCODIV_EN BIT(10) +#define VC6_HDMI_TX_PHY_PLL_VCOCLK_DIV_VCODIV_MASK VC4_MASK(9, 0) + +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_CTL_MASK VC4_MASK(31, 28) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_ENABLE_MASK VC4_MASK(27, 27) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_RATE_CTL_MASK VC4_MASK(26, 26) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_POST_TAP_EN_MASK VC4_MASK(25, 25) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_LDMOS_BIAS_CTL_MASK VC4_MASK(24, 23) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_COM_MODE_LDMOS_EN_MASK VC4_MASK(22, 22) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EDGE_SEL_MASK VC4_MASK(21, 21) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_SRC_HS_EN_MASK VC4_MASK(20, 20) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_TERM_CTL_MASK VC4_MASK(19, 18) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_SRC_EN_MASK VC4_MASK(17, 17) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_SRC_EN_MASK VC4_MASK(16, 16) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_CTL_MASK VC4_MASK(15, 12) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_SRC_HS_EN_MASK VC4_MASK(11, 11) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_MAIN_TAP_CURRENT_SELECT_MASK VC4_MASK(10, 8) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_POST_TAP_CURRENT_SELECT_MASK VC4_MASK(7, 5) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_CTL_SLOW_LOADING_MASK VC4_MASK(4, 3) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_CTL_SLOW_DRIVING_MASK VC4_MASK(2, 1) +#define VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_PRE_TAP_EN_MASK VC4_MASK(0, 0) + +#define VC6_HDMI_TX_PHY_PLL_RESET_CTL_PLL_PLLPOST_RESETB BIT(1) +#define VC6_HDMI_TX_PHY_PLL_RESET_CTL_PLL_RESETB BIT(0) + +#define VC6_HDMI_TX_PHY_PLL_POWERUP_CTL_PLL_PWRUP BIT(0) + #define OSCILLATOR_FREQUENCY 54000000 void vc4_hdmi_phy_init(struct vc4_hdmi *vc4_hdmi, @@ -558,3 +600,601 @@ void vc5_hdmi_phy_rng_disable(struct vc4_hdmi *vc4_hdmi) VC4_HDMI_TX_PHY_POWERDOWN_CTL_RNDGEN_PWRDN); spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); } + +#define VC6_VCO_MIN_FREQ (8ULL * 1000 * 1000 * 1000) +#define VC6_VCO_MAX_FREQ (12ULL * 1000 * 1000 * 1000) + +static unsigned long long +vc6_phy_get_vco_freq(unsigned long long tmds_rate, unsigned int *vco_div) +{ + unsigned int min_div; + unsigned int max_div; + unsigned int div; + + div = 0; + while (tmds_rate * div * 10 < VC6_VCO_MIN_FREQ) + div++; + min_div = div; + + while (tmds_rate * (div + 1) * 10 < VC6_VCO_MAX_FREQ) + div++; + max_div = div; + + div = min_div + (max_div - min_div) / 2; + + *vco_div = div; + return tmds_rate * div * 10; +} + +struct vc6_phy_lane_settings { + unsigned int ext_current_ctl:4; + unsigned int ffe_enable:1; + unsigned int slew_rate_ctl:1; + unsigned int ffe_post_tap_en:1; + unsigned int ldmos_bias_ctl:2; + unsigned int com_mode_ldmos_en:1; + unsigned int edge_sel:1; + unsigned int ext_current_src_hs_en:1; + unsigned int term_ctl:2; + unsigned int ext_current_src_en:1; + unsigned int int_current_src_en:1; + unsigned int int_current_ctl:4; + unsigned int int_current_src_hs_en:1; + unsigned int main_tap_current_select:3; + unsigned int post_tap_current_select:3; + unsigned int slew_ctl_slow_loading:2; + unsigned int slew_ctl_slow_driving:2; + unsigned int ffe_pre_tap_en:1; +}; + +struct vc6_phy_settings { + unsigned long long min_rate; + unsigned long long max_rate; + struct vc6_phy_lane_settings channel[3]; + struct vc6_phy_lane_settings clock; +}; + +static const struct vc6_phy_settings vc6_hdmi_phy_settings[] = { + { + 0, 222000000, + { + { + /* 200mA */ + .ext_current_ctl = 8, + + /* 0.85V */ + .ldmos_bias_ctl = 1, + + /* Enable External Current Source */ + .ext_current_src_en = 1, + + /* 200mA */ + .int_current_ctl = 8, + + /* 17.6 mA */ + .main_tap_current_select = 7, + }, + { + /* 200mA */ + .ext_current_ctl = 8, + + /* 0.85V */ + .ldmos_bias_ctl = 1, + + /* Enable External Current Source */ + .ext_current_src_en = 1, + + /* 200mA */ + .int_current_ctl = 8, + + /* 17.6 mA */ + .main_tap_current_select = 7, + }, + { + /* 200mA */ + .ext_current_ctl = 8, + + /* 0.85V */ + .ldmos_bias_ctl = 1, + + /* Enable External Current Source */ + .ext_current_src_en = 1, + + /* 200mA */ + .int_current_ctl = 8, + + /* 17.6 mA */ + .main_tap_current_select = 7, + }, + }, + { + /* 200mA */ + .ext_current_ctl = 8, + + /* 0.85V */ + .ldmos_bias_ctl = 1, + + /* Enable External Current Source */ + .ext_current_src_en = 1, + + /* 200mA */ + .int_current_ctl = 8, + + /* 17.6 mA */ + .main_tap_current_select = 7, + }, + }, + { + 222000001, 297000000, + { + { + /* 200mA and 180mA ?! */ + .ext_current_ctl = 12, + + /* 0.85V */ + .ldmos_bias_ctl = 1, + + /* 100 Ohm */ + .term_ctl = 1, + + /* Enable External Current Source */ + .ext_current_src_en = 1, + + /* Enable Internal Current Source */ + .int_current_src_en = 1, + }, + { + /* 200mA and 180mA ?! */ + .ext_current_ctl = 12, + + /* 0.85V */ + .ldmos_bias_ctl = 1, + + /* 100 Ohm */ + .term_ctl = 1, + + /* Enable External Current Source */ + .ext_current_src_en = 1, + + /* Enable Internal Current Source */ + .int_current_src_en = 1, + }, + { + /* 200mA and 180mA ?! */ + .ext_current_ctl = 12, + + /* 0.85V */ + .ldmos_bias_ctl = 1, + + /* 100 Ohm */ + .term_ctl = 1, + + /* Enable External Current Source */ + .ext_current_src_en = 1, + + /* Enable Internal Current Source */ + .int_current_src_en = 1, + }, + }, + { + /* 200mA and 180mA ?! */ + .ext_current_ctl = 12, + + /* 0.85V */ + .ldmos_bias_ctl = 1, + + /* 100 Ohm */ + .term_ctl = 1, + + /* Enable External Current Source */ + .ext_current_src_en = 1, + + /* Enable Internal Current Source */ + .int_current_src_en = 1, + + /* Internal Current Source Half Swing Enable*/ + .int_current_src_hs_en = 1, + }, + }, + { + 297000001, 597000044, + { + { + /* 200mA */ + .ext_current_ctl = 8, + + /* Normal Slew Rate Control */ + .slew_rate_ctl = 1, + + /* 0.85V */ + .ldmos_bias_ctl = 1, + + /* 50 Ohms */ + .term_ctl = 3, + + /* Enable External Current Source */ + .ext_current_src_en = 1, + + /* Enable Internal Current Source */ + .int_current_src_en = 1, + + /* 200mA */ + .int_current_ctl = 8, + + /* 17.6 mA */ + .main_tap_current_select = 7, + }, + { + /* 200mA */ + .ext_current_ctl = 8, + + /* Normal Slew Rate Control */ + .slew_rate_ctl = 1, + + /* 0.85V */ + .ldmos_bias_ctl = 1, + + /* 50 Ohms */ + .term_ctl = 3, + + /* Enable External Current Source */ + .ext_current_src_en = 1, + + /* Enable Internal Current Source */ + .int_current_src_en = 1, + + /* 200mA */ + .int_current_ctl = 8, + + /* 17.6 mA */ + .main_tap_current_select = 7, + }, + { + /* 200mA */ + .ext_current_ctl = 8, + + /* Normal Slew Rate Control */ + .slew_rate_ctl = 1, + + /* 0.85V */ + .ldmos_bias_ctl = 1, + + /* 50 Ohms */ + .term_ctl = 3, + + /* Enable External Current Source */ + .ext_current_src_en = 1, + + /* Enable Internal Current Source */ + .int_current_src_en = 1, + + /* 200mA */ + .int_current_ctl = 8, + + /* 17.6 mA */ + .main_tap_current_select = 7, + }, + }, + { + /* 200mA */ + .ext_current_ctl = 8, + + /* Normal Slew Rate Control */ + .slew_rate_ctl = 1, + + /* 0.85V */ + .ldmos_bias_ctl = 1, + + /* External Current Source Half Swing Enable*/ + .ext_current_src_hs_en = 1, + + /* 50 Ohms */ + .term_ctl = 3, + + /* Enable External Current Source */ + .ext_current_src_en = 1, + + /* Enable Internal Current Source */ + .int_current_src_en = 1, + + /* 200mA */ + .int_current_ctl = 8, + + /* Internal Current Source Half Swing Enable*/ + .int_current_src_hs_en = 1, + + /* 17.6 mA */ + .main_tap_current_select = 7, + }, + }, +}; + +static const struct vc6_phy_settings * +vc6_phy_get_settings(unsigned long long tmds_rate) +{ + unsigned int count = ARRAY_SIZE(vc6_hdmi_phy_settings); + unsigned int i; + + for (i = 0; i < count; i++) { + const struct vc6_phy_settings *s = &vc6_hdmi_phy_settings[i]; + + if (tmds_rate >= s->min_rate && tmds_rate <= s->max_rate) + return s; + } + + /* + * If the pixel clock exceeds our max setting, try the max + * setting anyway. + */ + return &vc6_hdmi_phy_settings[count - 1]; +} + +static const struct vc6_phy_lane_settings * +vc6_phy_get_channel_settings(enum vc4_hdmi_phy_channel chan, + unsigned long long tmds_rate) +{ + const struct vc6_phy_settings *settings = vc6_phy_get_settings(tmds_rate); + + if (chan == PHY_LANE_CK) + return &settings->clock; + + return &settings->channel[chan]; +} + +static void vc6_hdmi_reset_phy(struct vc4_hdmi *vc4_hdmi) +{ + lockdep_assert_held(&vc4_hdmi->hw_lock); + + HDMI_WRITE(HDMI_TX_PHY_RESET_CTL, 0); + HDMI_WRITE(HDMI_TX_PHY_POWERUP_CTL, 0); +} + +void vc6_hdmi_phy_init(struct vc4_hdmi *vc4_hdmi, + struct drm_connector_state *conn_state) +{ + const struct vc6_phy_lane_settings *chan0_settings; + const struct vc6_phy_lane_settings *chan1_settings; + const struct vc6_phy_lane_settings *chan2_settings; + const struct vc6_phy_lane_settings *clock_settings; + const struct vc4_hdmi_variant *variant = vc4_hdmi->variant; + unsigned long long pixel_freq = conn_state->hdmi.tmds_char_rate; + unsigned long long vco_freq; + unsigned char word_sel; + unsigned long flags; + unsigned int vco_div; + + vco_freq = vc6_phy_get_vco_freq(pixel_freq, &vco_div); + + spin_lock_irqsave(&vc4_hdmi->hw_lock, flags); + + vc6_hdmi_reset_phy(vc4_hdmi); + + HDMI_WRITE(HDMI_TX_PHY_PLL_MISC_0, 0x810c6000); + HDMI_WRITE(HDMI_TX_PHY_PLL_MISC_1, 0x00b8c451); + HDMI_WRITE(HDMI_TX_PHY_PLL_MISC_2, 0x46402e31); + HDMI_WRITE(HDMI_TX_PHY_PLL_MISC_3, 0x00b8c005); + HDMI_WRITE(HDMI_TX_PHY_PLL_MISC_4, 0x42410261); + HDMI_WRITE(HDMI_TX_PHY_PLL_MISC_5, 0xcc021001); + HDMI_WRITE(HDMI_TX_PHY_PLL_MISC_6, 0xc8301c80); + HDMI_WRITE(HDMI_TX_PHY_PLL_MISC_7, 0xb0804444); + HDMI_WRITE(HDMI_TX_PHY_PLL_MISC_8, 0xf80f8000); + + HDMI_WRITE(HDMI_TX_PHY_PLL_REFCLK, + VC6_HDMI_TX_PHY_PLL_REFCLK_REFCLK_SEL_CMOS | + VC4_SET_FIELD(54, VC6_HDMI_TX_PHY_PLL_REFCLK_REFFRQ)); + + HDMI_WRITE(HDMI_TX_PHY_RESET_CTL, 0x7f); + + HDMI_WRITE(HDMI_RM_OFFSET, + VC4_HDMI_RM_OFFSET_ONLY | + VC4_SET_FIELD(phy_get_rm_offset(vco_freq), + VC4_HDMI_RM_OFFSET_OFFSET)); + + HDMI_WRITE(HDMI_TX_PHY_PLL_VCOCLK_DIV, + VC6_HDMI_TX_PHY_PLL_VCOCLK_DIV_VCODIV_EN | + VC4_SET_FIELD(vco_div, + VC6_HDMI_TX_PHY_PLL_VCOCLK_DIV_VCODIV)); + + HDMI_WRITE(HDMI_TX_PHY_PLL_CFG, + VC4_SET_FIELD(0, VC4_HDMI_TX_PHY_PLL_CFG_PDIV)); + + HDMI_WRITE(HDMI_TX_PHY_PLL_POST_KDIV, + VC4_SET_FIELD(2, VC6_HDMI_TX_PHY_PLL_POST_KDIV_CLK0_SEL) | + VC4_SET_FIELD(1, VC6_HDMI_TX_PHY_PLL_POST_KDIV_KDIV)); + + chan0_settings = + vc6_phy_get_channel_settings(variant->phy_lane_mapping[PHY_LANE_0], + pixel_freq); + HDMI_WRITE(HDMI_TX_PHY_CTL_0, + VC4_SET_FIELD(chan0_settings->ext_current_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_CTL) | + VC4_SET_FIELD(chan0_settings->ffe_enable, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_ENABLE) | + VC4_SET_FIELD(chan0_settings->slew_rate_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_RATE_CTL) | + VC4_SET_FIELD(chan0_settings->ffe_post_tap_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_POST_TAP_EN) | + VC4_SET_FIELD(chan0_settings->ldmos_bias_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_LDMOS_BIAS_CTL) | + VC4_SET_FIELD(chan0_settings->com_mode_ldmos_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_COM_MODE_LDMOS_EN) | + VC4_SET_FIELD(chan0_settings->edge_sel, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EDGE_SEL) | + VC4_SET_FIELD(chan0_settings->ext_current_src_hs_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_SRC_HS_EN) | + VC4_SET_FIELD(chan0_settings->term_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_TERM_CTL) | + VC4_SET_FIELD(chan0_settings->ext_current_src_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_SRC_EN) | + VC4_SET_FIELD(chan0_settings->int_current_src_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_SRC_EN) | + VC4_SET_FIELD(chan0_settings->int_current_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_CTL) | + VC4_SET_FIELD(chan0_settings->int_current_src_hs_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_SRC_HS_EN) | + VC4_SET_FIELD(chan0_settings->main_tap_current_select, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_MAIN_TAP_CURRENT_SELECT) | + VC4_SET_FIELD(chan0_settings->post_tap_current_select, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_POST_TAP_CURRENT_SELECT) | + VC4_SET_FIELD(chan0_settings->slew_ctl_slow_loading, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_CTL_SLOW_LOADING) | + VC4_SET_FIELD(chan0_settings->slew_ctl_slow_driving, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_CTL_SLOW_DRIVING) | + VC4_SET_FIELD(chan0_settings->ffe_pre_tap_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_PRE_TAP_EN)); + + chan1_settings = + vc6_phy_get_channel_settings(variant->phy_lane_mapping[PHY_LANE_1], + pixel_freq); + HDMI_WRITE(HDMI_TX_PHY_CTL_1, + VC4_SET_FIELD(chan1_settings->ext_current_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_CTL) | + VC4_SET_FIELD(chan1_settings->ffe_enable, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_ENABLE) | + VC4_SET_FIELD(chan1_settings->slew_rate_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_RATE_CTL) | + VC4_SET_FIELD(chan1_settings->ffe_post_tap_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_POST_TAP_EN) | + VC4_SET_FIELD(chan1_settings->ldmos_bias_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_LDMOS_BIAS_CTL) | + VC4_SET_FIELD(chan1_settings->com_mode_ldmos_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_COM_MODE_LDMOS_EN) | + VC4_SET_FIELD(chan1_settings->edge_sel, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EDGE_SEL) | + VC4_SET_FIELD(chan1_settings->ext_current_src_hs_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_SRC_HS_EN) | + VC4_SET_FIELD(chan1_settings->term_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_TERM_CTL) | + VC4_SET_FIELD(chan1_settings->ext_current_src_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_SRC_EN) | + VC4_SET_FIELD(chan1_settings->int_current_src_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_SRC_EN) | + VC4_SET_FIELD(chan1_settings->int_current_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_CTL) | + VC4_SET_FIELD(chan1_settings->int_current_src_hs_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_SRC_HS_EN) | + VC4_SET_FIELD(chan1_settings->main_tap_current_select, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_MAIN_TAP_CURRENT_SELECT) | + VC4_SET_FIELD(chan1_settings->post_tap_current_select, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_POST_TAP_CURRENT_SELECT) | + VC4_SET_FIELD(chan1_settings->slew_ctl_slow_loading, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_CTL_SLOW_LOADING) | + VC4_SET_FIELD(chan1_settings->slew_ctl_slow_driving, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_CTL_SLOW_DRIVING) | + VC4_SET_FIELD(chan1_settings->ffe_pre_tap_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_PRE_TAP_EN)); + + chan2_settings = + vc6_phy_get_channel_settings(variant->phy_lane_mapping[PHY_LANE_2], + pixel_freq); + HDMI_WRITE(HDMI_TX_PHY_CTL_2, + VC4_SET_FIELD(chan2_settings->ext_current_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_CTL) | + VC4_SET_FIELD(chan2_settings->ffe_enable, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_ENABLE) | + VC4_SET_FIELD(chan2_settings->slew_rate_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_RATE_CTL) | + VC4_SET_FIELD(chan2_settings->ffe_post_tap_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_POST_TAP_EN) | + VC4_SET_FIELD(chan2_settings->ldmos_bias_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_LDMOS_BIAS_CTL) | + VC4_SET_FIELD(chan2_settings->com_mode_ldmos_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_COM_MODE_LDMOS_EN) | + VC4_SET_FIELD(chan2_settings->edge_sel, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EDGE_SEL) | + VC4_SET_FIELD(chan2_settings->ext_current_src_hs_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_SRC_HS_EN) | + VC4_SET_FIELD(chan2_settings->term_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_TERM_CTL) | + VC4_SET_FIELD(chan2_settings->ext_current_src_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_SRC_EN) | + VC4_SET_FIELD(chan2_settings->int_current_src_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_SRC_EN) | + VC4_SET_FIELD(chan2_settings->int_current_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_CTL) | + VC4_SET_FIELD(chan2_settings->int_current_src_hs_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_SRC_HS_EN) | + VC4_SET_FIELD(chan2_settings->main_tap_current_select, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_MAIN_TAP_CURRENT_SELECT) | + VC4_SET_FIELD(chan2_settings->post_tap_current_select, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_POST_TAP_CURRENT_SELECT) | + VC4_SET_FIELD(chan2_settings->slew_ctl_slow_loading, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_CTL_SLOW_LOADING) | + VC4_SET_FIELD(chan2_settings->slew_ctl_slow_driving, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_CTL_SLOW_DRIVING) | + VC4_SET_FIELD(chan2_settings->ffe_pre_tap_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_PRE_TAP_EN)); + + clock_settings = + vc6_phy_get_channel_settings(variant->phy_lane_mapping[PHY_LANE_CK], + pixel_freq); + HDMI_WRITE(HDMI_TX_PHY_CTL_CK, + VC4_SET_FIELD(clock_settings->ext_current_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_CTL) | + VC4_SET_FIELD(clock_settings->ffe_enable, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_ENABLE) | + VC4_SET_FIELD(clock_settings->slew_rate_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_RATE_CTL) | + VC4_SET_FIELD(clock_settings->ffe_post_tap_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_POST_TAP_EN) | + VC4_SET_FIELD(clock_settings->ldmos_bias_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_LDMOS_BIAS_CTL) | + VC4_SET_FIELD(clock_settings->com_mode_ldmos_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_COM_MODE_LDMOS_EN) | + VC4_SET_FIELD(clock_settings->edge_sel, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EDGE_SEL) | + VC4_SET_FIELD(clock_settings->ext_current_src_hs_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_SRC_HS_EN) | + VC4_SET_FIELD(clock_settings->term_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_TERM_CTL) | + VC4_SET_FIELD(clock_settings->ext_current_src_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_EXT_CURRENT_SRC_EN) | + VC4_SET_FIELD(clock_settings->int_current_src_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_SRC_EN) | + VC4_SET_FIELD(clock_settings->int_current_ctl, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_CTL) | + VC4_SET_FIELD(clock_settings->int_current_src_hs_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_INT_CURRENT_SRC_HS_EN) | + VC4_SET_FIELD(clock_settings->main_tap_current_select, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_MAIN_TAP_CURRENT_SELECT) | + VC4_SET_FIELD(clock_settings->post_tap_current_select, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_POST_TAP_CURRENT_SELECT) | + VC4_SET_FIELD(clock_settings->slew_ctl_slow_loading, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_CTL_SLOW_LOADING) | + VC4_SET_FIELD(clock_settings->slew_ctl_slow_driving, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_SLEW_CTL_SLOW_DRIVING) | + VC4_SET_FIELD(clock_settings->ffe_pre_tap_en, + VC6_HDMI_TX_PHY_HDMI_CTRL_CHX_FFE_PRE_TAP_EN)); + + if (pixel_freq >= 340000000) + word_sel = 3; + else + word_sel = 0; + HDMI_WRITE(HDMI_TX_PHY_TMDS_CLK_WORD_SEL, word_sel); + + HDMI_WRITE(HDMI_TX_PHY_POWERUP_CTL, + VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_BG_PWRUP | + VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_LDO_PWRUP | + VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_BIAS_PWRUP | + VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_TX_CK_PWRUP | + VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_TX_2_PWRUP | + VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_TX_1_PWRUP | + VC6_HDMI_TX_PHY_HDMI_POWERUP_CTL_TX_0_PWRUP); + + HDMI_WRITE(HDMI_TX_PHY_PLL_POWERUP_CTL, + VC6_HDMI_TX_PHY_PLL_POWERUP_CTL_PLL_PWRUP); + + HDMI_WRITE(HDMI_TX_PHY_PLL_RESET_CTL, + HDMI_READ(HDMI_TX_PHY_PLL_RESET_CTL) & + ~VC6_HDMI_TX_PHY_PLL_RESET_CTL_PLL_RESETB); + + HDMI_WRITE(HDMI_TX_PHY_PLL_RESET_CTL, + HDMI_READ(HDMI_TX_PHY_PLL_RESET_CTL) | + VC6_HDMI_TX_PHY_PLL_RESET_CTL_PLL_RESETB); + + spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); +} + +void vc6_hdmi_phy_disable(struct vc4_hdmi *vc4_hdmi) +{ +} diff --git a/drivers/gpu/drm/vc4/vc4_hdmi_regs.h b/drivers/gpu/drm/vc4/vc4_hdmi_regs.h index 68455ce513e7..59bfd69f54d9 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi_regs.h +++ b/drivers/gpu/drm/vc4/vc4_hdmi_regs.h @@ -111,13 +111,30 @@ enum vc4_hdmi_field { HDMI_TX_PHY_CTL_1, HDMI_TX_PHY_CTL_2, HDMI_TX_PHY_CTL_3, + HDMI_TX_PHY_CTL_CK, HDMI_TX_PHY_PLL_CALIBRATION_CONFIG_1, HDMI_TX_PHY_PLL_CALIBRATION_CONFIG_2, HDMI_TX_PHY_PLL_CALIBRATION_CONFIG_4, HDMI_TX_PHY_PLL_CFG, + HDMI_TX_PHY_PLL_CFG_PDIV, HDMI_TX_PHY_PLL_CTL_0, HDMI_TX_PHY_PLL_CTL_1, + HDMI_TX_PHY_PLL_MISC_0, + HDMI_TX_PHY_PLL_MISC_1, + HDMI_TX_PHY_PLL_MISC_2, + HDMI_TX_PHY_PLL_MISC_3, + HDMI_TX_PHY_PLL_MISC_4, + HDMI_TX_PHY_PLL_MISC_5, + HDMI_TX_PHY_PLL_MISC_6, + HDMI_TX_PHY_PLL_MISC_7, + HDMI_TX_PHY_PLL_MISC_8, + HDMI_TX_PHY_PLL_POST_KDIV, + HDMI_TX_PHY_PLL_POWERUP_CTL, + HDMI_TX_PHY_PLL_REFCLK, + HDMI_TX_PHY_PLL_RESET_CTL, + HDMI_TX_PHY_PLL_VCOCLK_DIV, HDMI_TX_PHY_POWERDOWN_CTL, + HDMI_TX_PHY_POWERUP_CTL, HDMI_TX_PHY_RESET_CTL, HDMI_TX_PHY_TMDS_CLK_WORD_SEL, HDMI_VEC_INTERFACE_CFG, @@ -411,6 +428,206 @@ static const struct vc4_hdmi_register __maybe_unused vc5_hdmi_hdmi1_fields[] = { VC5_CSC_REG(HDMI_CSC_CHANNEL_CTL, 0x02c), }; +static const struct vc4_hdmi_register __maybe_unused vc6_hdmi_hdmi0_fields[] = { + VC4_HD_REG(HDMI_DVP_CTL, 0x0000), + VC4_HD_REG(HDMI_MAI_CTL, 0x0010), + VC4_HD_REG(HDMI_MAI_THR, 0x0014), + VC4_HD_REG(HDMI_MAI_FMT, 0x0018), + VC4_HD_REG(HDMI_MAI_DATA, 0x001c), + VC4_HD_REG(HDMI_MAI_SMP, 0x0020), + VC4_HD_REG(HDMI_VID_CTL, 0x0044), + VC4_HD_REG(HDMI_FRAME_COUNT, 0x0060), + + VC4_HDMI_REG(HDMI_FIFO_CTL, 0x07c), + VC4_HDMI_REG(HDMI_AUDIO_PACKET_CONFIG, 0x0c0), + VC4_HDMI_REG(HDMI_RAM_PACKET_CONFIG, 0x0c4), + VC4_HDMI_REG(HDMI_RAM_PACKET_STATUS, 0x0cc), + VC4_HDMI_REG(HDMI_CRP_CFG, 0x0d0), + VC4_HDMI_REG(HDMI_CTS_0, 0x0d4), + VC4_HDMI_REG(HDMI_CTS_1, 0x0d8), + VC4_HDMI_REG(HDMI_SCHEDULER_CONTROL, 0x0e8), + VC4_HDMI_REG(HDMI_HORZA, 0x0ec), + VC4_HDMI_REG(HDMI_HORZB, 0x0f0), + VC4_HDMI_REG(HDMI_VERTA0, 0x0f4), + VC4_HDMI_REG(HDMI_VERTB0, 0x0f8), + VC4_HDMI_REG(HDMI_VERTA1, 0x100), + VC4_HDMI_REG(HDMI_VERTB1, 0x104), + VC4_HDMI_REG(HDMI_MISC_CONTROL, 0x114), + VC4_HDMI_REG(HDMI_MAI_CHANNEL_MAP, 0x0a4), + VC4_HDMI_REG(HDMI_MAI_CONFIG, 0x0a8), + VC4_HDMI_REG(HDMI_FORMAT_DET_1, 0x148), + VC4_HDMI_REG(HDMI_FORMAT_DET_2, 0x14c), + VC4_HDMI_REG(HDMI_FORMAT_DET_3, 0x150), + VC4_HDMI_REG(HDMI_FORMAT_DET_4, 0x158), + VC4_HDMI_REG(HDMI_FORMAT_DET_5, 0x15c), + VC4_HDMI_REG(HDMI_FORMAT_DET_6, 0x160), + VC4_HDMI_REG(HDMI_FORMAT_DET_7, 0x164), + VC4_HDMI_REG(HDMI_FORMAT_DET_8, 0x168), + VC4_HDMI_REG(HDMI_FORMAT_DET_9, 0x16c), + VC4_HDMI_REG(HDMI_FORMAT_DET_10, 0x170), + VC4_HDMI_REG(HDMI_DEEP_COLOR_CONFIG_1, 0x18c), + VC4_HDMI_REG(HDMI_GCP_CONFIG, 0x194), + VC4_HDMI_REG(HDMI_GCP_WORD_1, 0x198), + VC4_HDMI_REG(HDMI_HOTPLUG, 0x1c8), + VC4_HDMI_REG(HDMI_SCRAMBLER_CTL, 0x1e4), + + VC5_DVP_REG(HDMI_CLOCK_STOP, 0x0bc), + VC5_DVP_REG(HDMI_VEC_INTERFACE_CFG, 0x0f0), + VC5_DVP_REG(HDMI_VEC_INTERFACE_XBAR, 0x0f4), + + VC5_PHY_REG(HDMI_TX_PHY_RESET_CTL, 0x000), + VC5_PHY_REG(HDMI_TX_PHY_POWERUP_CTL, 0x004), + VC5_PHY_REG(HDMI_TX_PHY_CTL_0, 0x008), + VC5_PHY_REG(HDMI_TX_PHY_CTL_1, 0x00c), + VC5_PHY_REG(HDMI_TX_PHY_CTL_2, 0x010), + VC5_PHY_REG(HDMI_TX_PHY_CTL_CK, 0x014), + VC5_PHY_REG(HDMI_TX_PHY_PLL_REFCLK, 0x01c), + VC5_PHY_REG(HDMI_TX_PHY_PLL_POST_KDIV, 0x028), + VC5_PHY_REG(HDMI_TX_PHY_PLL_VCOCLK_DIV, 0x02c), + VC5_PHY_REG(HDMI_TX_PHY_PLL_CFG, 0x044), + VC5_PHY_REG(HDMI_TX_PHY_TMDS_CLK_WORD_SEL, 0x054), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_0, 0x060), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_1, 0x064), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_2, 0x068), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_3, 0x06c), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_4, 0x070), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_5, 0x074), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_6, 0x078), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_7, 0x07c), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_8, 0x080), + VC5_PHY_REG(HDMI_TX_PHY_PLL_RESET_CTL, 0x190), + VC5_PHY_REG(HDMI_TX_PHY_PLL_POWERUP_CTL, 0x194), + + VC5_RM_REG(HDMI_RM_CONTROL, 0x000), + VC5_RM_REG(HDMI_RM_OFFSET, 0x018), + VC5_RM_REG(HDMI_RM_FORMAT, 0x01c), + + VC5_RAM_REG(HDMI_RAM_PACKET_START, 0x000), + + VC5_CEC_REG(HDMI_CEC_CNTRL_1, 0x010), + VC5_CEC_REG(HDMI_CEC_CNTRL_2, 0x014), + VC5_CEC_REG(HDMI_CEC_CNTRL_3, 0x018), + VC5_CEC_REG(HDMI_CEC_CNTRL_4, 0x01c), + VC5_CEC_REG(HDMI_CEC_CNTRL_5, 0x020), + VC5_CEC_REG(HDMI_CEC_TX_DATA_1, 0x028), + VC5_CEC_REG(HDMI_CEC_TX_DATA_2, 0x02c), + VC5_CEC_REG(HDMI_CEC_TX_DATA_3, 0x030), + VC5_CEC_REG(HDMI_CEC_TX_DATA_4, 0x034), + VC5_CEC_REG(HDMI_CEC_RX_DATA_1, 0x038), + VC5_CEC_REG(HDMI_CEC_RX_DATA_2, 0x03c), + VC5_CEC_REG(HDMI_CEC_RX_DATA_3, 0x040), + VC5_CEC_REG(HDMI_CEC_RX_DATA_4, 0x044), + + VC5_CSC_REG(HDMI_CSC_CTL, 0x000), + VC5_CSC_REG(HDMI_CSC_12_11, 0x004), + VC5_CSC_REG(HDMI_CSC_14_13, 0x008), + VC5_CSC_REG(HDMI_CSC_22_21, 0x00c), + VC5_CSC_REG(HDMI_CSC_24_23, 0x010), + VC5_CSC_REG(HDMI_CSC_32_31, 0x014), + VC5_CSC_REG(HDMI_CSC_34_33, 0x018), + VC5_CSC_REG(HDMI_CSC_CHANNEL_CTL, 0x02c), +}; + +static const struct vc4_hdmi_register __maybe_unused vc6_hdmi_hdmi1_fields[] = { + VC4_HD_REG(HDMI_DVP_CTL, 0x0000), + VC4_HD_REG(HDMI_MAI_CTL, 0x0030), + VC4_HD_REG(HDMI_MAI_THR, 0x0034), + VC4_HD_REG(HDMI_MAI_FMT, 0x0038), + VC4_HD_REG(HDMI_MAI_DATA, 0x003c), + VC4_HD_REG(HDMI_MAI_SMP, 0x0040), + VC4_HD_REG(HDMI_VID_CTL, 0x0048), + VC4_HD_REG(HDMI_FRAME_COUNT, 0x0064), + + VC4_HDMI_REG(HDMI_FIFO_CTL, 0x07c), + VC4_HDMI_REG(HDMI_AUDIO_PACKET_CONFIG, 0x0c0), + VC4_HDMI_REG(HDMI_RAM_PACKET_CONFIG, 0x0c4), + VC4_HDMI_REG(HDMI_RAM_PACKET_STATUS, 0x0cc), + VC4_HDMI_REG(HDMI_CRP_CFG, 0x0d0), + VC4_HDMI_REG(HDMI_CTS_0, 0x0d4), + VC4_HDMI_REG(HDMI_CTS_1, 0x0d8), + VC4_HDMI_REG(HDMI_SCHEDULER_CONTROL, 0x0e8), + VC4_HDMI_REG(HDMI_HORZA, 0x0ec), + VC4_HDMI_REG(HDMI_HORZB, 0x0f0), + VC4_HDMI_REG(HDMI_VERTA0, 0x0f4), + VC4_HDMI_REG(HDMI_VERTB0, 0x0f8), + VC4_HDMI_REG(HDMI_VERTA1, 0x100), + VC4_HDMI_REG(HDMI_VERTB1, 0x104), + VC4_HDMI_REG(HDMI_MISC_CONTROL, 0x114), + VC4_HDMI_REG(HDMI_MAI_CHANNEL_MAP, 0x0a4), + VC4_HDMI_REG(HDMI_MAI_CONFIG, 0x0a8), + VC4_HDMI_REG(HDMI_FORMAT_DET_1, 0x148), + VC4_HDMI_REG(HDMI_FORMAT_DET_2, 0x14c), + VC4_HDMI_REG(HDMI_FORMAT_DET_3, 0x150), + VC4_HDMI_REG(HDMI_FORMAT_DET_4, 0x158), + VC4_HDMI_REG(HDMI_FORMAT_DET_5, 0x15c), + VC4_HDMI_REG(HDMI_FORMAT_DET_6, 0x160), + VC4_HDMI_REG(HDMI_FORMAT_DET_7, 0x164), + VC4_HDMI_REG(HDMI_FORMAT_DET_8, 0x168), + VC4_HDMI_REG(HDMI_FORMAT_DET_9, 0x16c), + VC4_HDMI_REG(HDMI_FORMAT_DET_10, 0x170), + VC4_HDMI_REG(HDMI_DEEP_COLOR_CONFIG_1, 0x18c), + VC4_HDMI_REG(HDMI_GCP_CONFIG, 0x194), + VC4_HDMI_REG(HDMI_GCP_WORD_1, 0x198), + VC4_HDMI_REG(HDMI_HOTPLUG, 0x1c8), + VC4_HDMI_REG(HDMI_SCRAMBLER_CTL, 0x1e4), + + VC5_DVP_REG(HDMI_CLOCK_STOP, 0x0bc), + VC5_DVP_REG(HDMI_VEC_INTERFACE_CFG, 0x0f0), + VC5_DVP_REG(HDMI_VEC_INTERFACE_XBAR, 0x0f4), + + VC5_PHY_REG(HDMI_TX_PHY_RESET_CTL, 0x000), + VC5_PHY_REG(HDMI_TX_PHY_POWERUP_CTL, 0x004), + VC5_PHY_REG(HDMI_TX_PHY_CTL_0, 0x008), + VC5_PHY_REG(HDMI_TX_PHY_CTL_1, 0x00c), + VC5_PHY_REG(HDMI_TX_PHY_CTL_2, 0x010), + VC5_PHY_REG(HDMI_TX_PHY_CTL_CK, 0x014), + VC5_PHY_REG(HDMI_TX_PHY_PLL_REFCLK, 0x01c), + VC5_PHY_REG(HDMI_TX_PHY_PLL_POST_KDIV, 0x028), + VC5_PHY_REG(HDMI_TX_PHY_PLL_VCOCLK_DIV, 0x02c), + VC5_PHY_REG(HDMI_TX_PHY_PLL_CFG, 0x044), + VC5_PHY_REG(HDMI_TX_PHY_TMDS_CLK_WORD_SEL, 0x054), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_0, 0x060), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_1, 0x064), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_2, 0x068), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_3, 0x06c), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_4, 0x070), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_5, 0x074), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_6, 0x078), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_7, 0x07c), + VC5_PHY_REG(HDMI_TX_PHY_PLL_MISC_8, 0x080), + VC5_PHY_REG(HDMI_TX_PHY_PLL_RESET_CTL, 0x190), + VC5_PHY_REG(HDMI_TX_PHY_PLL_POWERUP_CTL, 0x194), + + VC5_RM_REG(HDMI_RM_CONTROL, 0x000), + VC5_RM_REG(HDMI_RM_OFFSET, 0x018), + VC5_RM_REG(HDMI_RM_FORMAT, 0x01c), + + VC5_RAM_REG(HDMI_RAM_PACKET_START, 0x000), + + VC5_CEC_REG(HDMI_CEC_CNTRL_1, 0x010), + VC5_CEC_REG(HDMI_CEC_CNTRL_2, 0x014), + VC5_CEC_REG(HDMI_CEC_CNTRL_3, 0x018), + VC5_CEC_REG(HDMI_CEC_CNTRL_4, 0x01c), + VC5_CEC_REG(HDMI_CEC_CNTRL_5, 0x020), + VC5_CEC_REG(HDMI_CEC_TX_DATA_1, 0x028), + VC5_CEC_REG(HDMI_CEC_TX_DATA_2, 0x02c), + VC5_CEC_REG(HDMI_CEC_TX_DATA_3, 0x030), + VC5_CEC_REG(HDMI_CEC_TX_DATA_4, 0x034), + VC5_CEC_REG(HDMI_CEC_RX_DATA_1, 0x038), + VC5_CEC_REG(HDMI_CEC_RX_DATA_2, 0x03c), + VC5_CEC_REG(HDMI_CEC_RX_DATA_3, 0x040), + VC5_CEC_REG(HDMI_CEC_RX_DATA_4, 0x044), + + VC5_CSC_REG(HDMI_CSC_CTL, 0x000), + VC5_CSC_REG(HDMI_CSC_12_11, 0x004), + VC5_CSC_REG(HDMI_CSC_14_13, 0x008), + VC5_CSC_REG(HDMI_CSC_22_21, 0x00c), + VC5_CSC_REG(HDMI_CSC_24_23, 0x010), + VC5_CSC_REG(HDMI_CSC_32_31, 0x014), + VC5_CSC_REG(HDMI_CSC_34_33, 0x018), + VC5_CSC_REG(HDMI_CSC_CHANNEL_CTL, 0x02c), +}; + static inline void __iomem *__vc4_hdmi_get_field_base(struct vc4_hdmi *hdmi, enum vc4_hdmi_regs reg) diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index 70623e6b91e9..4811d794001f 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -67,6 +67,140 @@ static const struct debugfs_reg32 vc4_hvs_regs[] = { VC4_REG32(SCALER_OLEDCOEF2), }; +static const struct debugfs_reg32 vc6_hvs_regs[] = { + VC4_REG32(SCALER6_VERSION), + VC4_REG32(SCALER6_CXM_SIZE), + VC4_REG32(SCALER6_LBM_SIZE), + VC4_REG32(SCALER6_UBM_SIZE), + VC4_REG32(SCALER6_COBA_SIZE), + VC4_REG32(SCALER6_COB_SIZE), + VC4_REG32(SCALER6_CONTROL), + VC4_REG32(SCALER6_FETCHER_STATUS), + VC4_REG32(SCALER6_FETCH_STATUS), + VC4_REG32(SCALER6_HANDLE_ERROR), + VC4_REG32(SCALER6_DISP0_CTRL0), + VC4_REG32(SCALER6_DISP0_CTRL1), + VC4_REG32(SCALER6_DISP0_BGND), + VC4_REG32(SCALER6_DISP0_LPTRS), + VC4_REG32(SCALER6_DISP0_COB), + VC4_REG32(SCALER6_DISP0_STATUS), + VC4_REG32(SCALER6_DISP0_DL), + VC4_REG32(SCALER6_DISP0_RUN), + VC4_REG32(SCALER6_DISP1_CTRL0), + VC4_REG32(SCALER6_DISP1_CTRL1), + VC4_REG32(SCALER6_DISP1_BGND), + VC4_REG32(SCALER6_DISP1_LPTRS), + VC4_REG32(SCALER6_DISP1_COB), + VC4_REG32(SCALER6_DISP1_STATUS), + VC4_REG32(SCALER6_DISP1_DL), + VC4_REG32(SCALER6_DISP1_RUN), + VC4_REG32(SCALER6_DISP2_CTRL0), + VC4_REG32(SCALER6_DISP2_CTRL1), + VC4_REG32(SCALER6_DISP2_BGND), + VC4_REG32(SCALER6_DISP2_LPTRS), + VC4_REG32(SCALER6_DISP2_COB), + VC4_REG32(SCALER6_DISP2_STATUS), + VC4_REG32(SCALER6_DISP2_DL), + VC4_REG32(SCALER6_DISP2_RUN), + VC4_REG32(SCALER6_EOLN), + VC4_REG32(SCALER6_DL_STATUS), + VC4_REG32(SCALER6_BFG_MISC), + VC4_REG32(SCALER6_QOS0), + VC4_REG32(SCALER6_PROF0), + VC4_REG32(SCALER6_QOS1), + VC4_REG32(SCALER6_PROF1), + VC4_REG32(SCALER6_QOS2), + VC4_REG32(SCALER6_PROF2), + VC4_REG32(SCALER6_PRI_MAP0), + VC4_REG32(SCALER6_PRI_MAP1), + VC4_REG32(SCALER6_HISTCTRL), + VC4_REG32(SCALER6_HISTBIN0), + VC4_REG32(SCALER6_HISTBIN1), + VC4_REG32(SCALER6_HISTBIN2), + VC4_REG32(SCALER6_HISTBIN3), + VC4_REG32(SCALER6_HISTBIN4), + VC4_REG32(SCALER6_HISTBIN5), + VC4_REG32(SCALER6_HISTBIN6), + VC4_REG32(SCALER6_HISTBIN7), + VC4_REG32(SCALER6_HDR_CFG_REMAP), + VC4_REG32(SCALER6_COL_SPACE), + VC4_REG32(SCALER6_HVS_ID), + VC4_REG32(SCALER6_CFC1), + VC4_REG32(SCALER6_DISP_UPM_ISO0), + VC4_REG32(SCALER6_DISP_UPM_ISO1), + VC4_REG32(SCALER6_DISP_UPM_ISO2), + VC4_REG32(SCALER6_DISP_LBM_ISO0), + VC4_REG32(SCALER6_DISP_LBM_ISO1), + VC4_REG32(SCALER6_DISP_LBM_ISO2), + VC4_REG32(SCALER6_DISP_COB_ISO0), + VC4_REG32(SCALER6_DISP_COB_ISO1), + VC4_REG32(SCALER6_DISP_COB_ISO2), + VC4_REG32(SCALER6_BAD_COB), + VC4_REG32(SCALER6_BAD_LBM), + VC4_REG32(SCALER6_BAD_UPM), + VC4_REG32(SCALER6_BAD_AXI), +}; + +static const struct debugfs_reg32 vc6_d_hvs_regs[] = { + VC4_REG32(SCALER6D_VERSION), + VC4_REG32(SCALER6D_CXM_SIZE), + VC4_REG32(SCALER6D_LBM_SIZE), + VC4_REG32(SCALER6D_UBM_SIZE), + VC4_REG32(SCALER6D_COBA_SIZE), + VC4_REG32(SCALER6D_COB_SIZE), + VC4_REG32(SCALER6D_CONTROL), + VC4_REG32(SCALER6D_FETCHER_STATUS), + VC4_REG32(SCALER6D_FETCH_STATUS), + VC4_REG32(SCALER6D_HANDLE_ERROR), + VC4_REG32(SCALER6D_DISP0_CTRL0), + VC4_REG32(SCALER6D_DISP0_CTRL1), + VC4_REG32(SCALER6D_DISP0_BGND0), + VC4_REG32(SCALER6D_DISP0_BGND1), + VC4_REG32(SCALER6D_DISP0_LPTRS), + VC4_REG32(SCALER6D_DISP0_COB), + VC4_REG32(SCALER6D_DISP0_STATUS), + VC4_REG32(SCALER6D_DISP0_DL), + VC4_REG32(SCALER6D_DISP0_RUN), + VC4_REG32(SCALER6D_DISP1_CTRL0), + VC4_REG32(SCALER6D_DISP1_CTRL1), + VC4_REG32(SCALER6D_DISP1_BGND0), + VC4_REG32(SCALER6D_DISP1_BGND1), + VC4_REG32(SCALER6D_DISP1_LPTRS), + VC4_REG32(SCALER6D_DISP1_COB), + VC4_REG32(SCALER6D_DISP1_STATUS), + VC4_REG32(SCALER6D_DISP1_DL), + VC4_REG32(SCALER6D_DISP1_RUN), + VC4_REG32(SCALER6D_DISP2_CTRL0), + VC4_REG32(SCALER6D_DISP2_CTRL1), + VC4_REG32(SCALER6D_DISP2_BGND0), + VC4_REG32(SCALER6D_DISP2_BGND1), + VC4_REG32(SCALER6D_DISP2_LPTRS), + VC4_REG32(SCALER6D_DISP2_COB), + VC4_REG32(SCALER6D_DISP2_STATUS), + VC4_REG32(SCALER6D_DISP2_DL), + VC4_REG32(SCALER6D_DISP2_RUN), + VC4_REG32(SCALER6D_EOLN), + VC4_REG32(SCALER6D_DL_STATUS), + VC4_REG32(SCALER6D_QOS0), + VC4_REG32(SCALER6D_PROF0), + VC4_REG32(SCALER6D_QOS1), + VC4_REG32(SCALER6D_PROF1), + VC4_REG32(SCALER6D_QOS2), + VC4_REG32(SCALER6D_PROF2), + VC4_REG32(SCALER6D_PRI_MAP0), + VC4_REG32(SCALER6D_PRI_MAP1), + VC4_REG32(SCALER6D_HISTCTRL), + VC4_REG32(SCALER6D_HISTBIN0), + VC4_REG32(SCALER6D_HISTBIN1), + VC4_REG32(SCALER6D_HISTBIN2), + VC4_REG32(SCALER6D_HISTBIN3), + VC4_REG32(SCALER6D_HISTBIN4), + VC4_REG32(SCALER6D_HISTBIN5), + VC4_REG32(SCALER6D_HISTBIN6), + VC4_REG32(SCALER6D_HISTBIN7), + VC4_REG32(SCALER6D_HVS_ID), +}; + void vc4_hvs_dump_state(struct vc4_hvs *hvs) { struct drm_device *drm = &hvs->vc4->base; @@ -145,6 +279,76 @@ static int vc4_hvs_debugfs_dlist(struct seq_file *m, void *data) return 0; } +static int vc6_hvs_debugfs_dlist(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct drm_device *dev = node->minor->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_hvs *hvs = vc4->hvs; + struct drm_printer p = drm_seq_file_printer(m); + unsigned int dlist_mem_size = hvs->dlist_mem_size; + unsigned int next_entry_start; + unsigned int i; + + for (i = 0; i < SCALER_CHANNELS_COUNT; i++) { + unsigned int active_dlist, dispstat; + unsigned int j; + + dispstat = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(i)), + SCALER6_DISPX_STATUS_MODE); + if (dispstat == SCALER6_DISPX_STATUS_MODE_DISABLED || + dispstat == SCALER6_DISPX_STATUS_MODE_EOF) { + drm_printf(&p, "HVS chan %u disabled\n", i); + continue; + } + + drm_printf(&p, "HVS chan %u:\n", i); + + active_dlist = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_DL(i)), + SCALER6_DISPX_DL_LACT); + next_entry_start = 0; + + for (j = active_dlist; j < dlist_mem_size; j++) { + u32 dlist_word; + + dlist_word = readl((u32 __iomem *)vc4->hvs->dlist + j); + drm_printf(&p, "dlist: %02d: 0x%08x\n", j, + dlist_word); + if (!next_entry_start || + next_entry_start == j) { + if (dlist_word & SCALER_CTL0_END) + break; + next_entry_start = j + + VC4_GET_FIELD(dlist_word, + SCALER_CTL0_SIZE); + } + } + } + + return 0; +} + +static int vc6_hvs_debugfs_upm_allocs(struct seq_file *m, void *data) +{ + struct drm_debugfs_entry *entry = m->private; + struct drm_device *dev = entry->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_hvs *hvs = vc4->hvs; + struct drm_printer p = drm_seq_file_printer(m); + struct vc4_upm_refcounts *refcount; + unsigned int i; + + drm_printf(&p, "UPM Handles:\n"); + for (i = 1; i <= VC4_NUM_UPM_HANDLES; i++) { + refcount = &hvs->upm_refcounts[i]; + drm_printf(&p, "handle %u: refcount %u, size %zu [%08llx + %08llx]\n", + i, refcount_read(&refcount->refcount), refcount->size, + refcount->upm.start, refcount->upm.size); + } + + return 0; +} + /* The filter kernel is composed of dwords each containing 3 9-bit * signed integers packed next to each other. */ @@ -215,12 +419,15 @@ static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs, static void vc4_hvs_lut_load(struct vc4_hvs *hvs, struct vc4_crtc *vc4_crtc) { - struct drm_device *drm = &hvs->vc4->base; + struct vc4_dev *vc4 = hvs->vc4; + struct drm_device *drm = &vc4->base; struct drm_crtc *crtc = &vc4_crtc->base; struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state); int idx; u32 i; + WARN_ON_ONCE(vc4->gen > VC4_GEN_5); + if (!drm_dev_enter(drm, &idx)) return; @@ -265,25 +472,56 @@ static void vc4_hvs_update_gamma_lut(struct vc4_hvs *hvs, u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo) { - struct drm_device *drm = &hvs->vc4->base; + struct vc4_dev *vc4 = hvs->vc4; + struct drm_device *drm = &vc4->base; u8 field = 0; int idx; + WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); + if (!drm_dev_enter(drm, &idx)) return 0; - switch (fifo) { - case 0: - field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1), - SCALER_DISPSTAT1_FRCNT0); + switch (vc4->gen) { + case VC4_GEN_6_C: + case VC4_GEN_6_D: + field = VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(fifo)), + SCALER6_DISPX_STATUS_FRCNT); break; - case 1: - field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1), - SCALER_DISPSTAT1_FRCNT1); + case VC4_GEN_5: + switch (fifo) { + case 0: + field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1), + SCALER5_DISPSTAT1_FRCNT0); + break; + case 1: + field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1), + SCALER5_DISPSTAT1_FRCNT1); + break; + case 2: + field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT2), + SCALER5_DISPSTAT2_FRCNT2); + break; + } + break; + case VC4_GEN_4: + switch (fifo) { + case 0: + field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1), + SCALER_DISPSTAT1_FRCNT0); + break; + case 1: + field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT1), + SCALER_DISPSTAT1_FRCNT1); + break; + case 2: + field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT2), + SCALER_DISPSTAT2_FRCNT2); + break; + } break; - case 2: - field = VC4_GET_FIELD(HVS_READ(SCALER_DISPSTAT2), - SCALER_DISPSTAT2_FRCNT2); + default: + drm_err(drm, "Unknown VC4 generation: %d", vc4->gen); break; } @@ -297,6 +535,8 @@ int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output) u32 reg; int ret; + WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); + switch (vc4->gen) { case VC4_GEN_4: return output; @@ -352,6 +592,24 @@ int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output) return -EPIPE; } + case VC4_GEN_6_C: + case VC4_GEN_6_D: + switch (output) { + case 0: + return 0; + + case 2: + return 2; + + case 1: + case 3: + case 4: + return 1; + + default: + return -EPIPE; + } + default: return -EPIPE; } @@ -370,6 +628,8 @@ static int vc4_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc, u32 dispctrl; int idx; + WARN_ON_ONCE(vc4->gen > VC4_GEN_5); + if (!drm_dev_enter(drm, &idx)) return -ENODEV; @@ -420,11 +680,50 @@ static int vc4_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc, return 0; } -void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan) +static int vc6_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc, + struct drm_display_mode *mode, bool oneshot) { - struct drm_device *drm = &hvs->vc4->base; + struct vc4_dev *vc4 = hvs->vc4; + struct drm_device *drm = &vc4->base; + struct vc4_crtc_state *vc4_crtc_state = to_vc4_crtc_state(crtc->state); + unsigned int chan = vc4_crtc_state->assigned_channel; + bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE; + u32 disp_ctrl1; + int idx; + + WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C); + + if (!drm_dev_enter(drm, &idx)) + return -ENODEV; + + HVS_WRITE(SCALER6_DISPX_CTRL0(chan), SCALER6_DISPX_CTRL0_RESET); + + disp_ctrl1 = HVS_READ(SCALER6_DISPX_CTRL1(chan)); + disp_ctrl1 &= ~SCALER6_DISPX_CTRL1_INTLACE; + HVS_WRITE(SCALER6_DISPX_CTRL1(chan), + disp_ctrl1 | (interlace ? SCALER6_DISPX_CTRL1_INTLACE : 0)); + + HVS_WRITE(SCALER6_DISPX_CTRL0(chan), + SCALER6_DISPX_CTRL0_ENB | + VC4_SET_FIELD(mode->hdisplay - 1, + SCALER6_DISPX_CTRL0_FWIDTH) | + (oneshot ? SCALER6_DISPX_CTRL0_ONESHOT : 0) | + VC4_SET_FIELD(mode->vdisplay - 1, + SCALER6_DISPX_CTRL0_LINES)); + + drm_dev_exit(idx); + + return 0; +} + +static void __vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan) +{ + struct vc4_dev *vc4 = hvs->vc4; + struct drm_device *drm = &vc4->base; int idx; + WARN_ON_ONCE(vc4->gen > VC4_GEN_5); + if (!drm_dev_enter(drm, &idx)) return; @@ -449,6 +748,44 @@ out: drm_dev_exit(idx); } +static void __vc6_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan) +{ + struct vc4_dev *vc4 = hvs->vc4; + struct drm_device *drm = &vc4->base; + int idx; + + WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C); + + if (!drm_dev_enter(drm, &idx)) + return; + + if (!(HVS_READ(SCALER6_DISPX_CTRL0(chan)) & SCALER6_DISPX_CTRL0_ENB)) + goto out; + + HVS_WRITE(SCALER6_DISPX_CTRL0(chan), + HVS_READ(SCALER6_DISPX_CTRL0(chan)) | SCALER6_DISPX_CTRL0_RESET); + + HVS_WRITE(SCALER6_DISPX_CTRL0(chan), + HVS_READ(SCALER6_DISPX_CTRL0(chan)) & ~SCALER6_DISPX_CTRL0_ENB); + + WARN_ON_ONCE(VC4_GET_FIELD(HVS_READ(SCALER6_DISPX_STATUS(chan)), + SCALER6_DISPX_STATUS_MODE) != + SCALER6_DISPX_STATUS_MODE_DISABLED); + +out: + drm_dev_exit(idx); +} + +void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int chan) +{ + struct vc4_dev *vc4 = hvs->vc4; + + if (vc4->gen >= VC4_GEN_6_C) + __vc6_hvs_stop_channel(hvs, chan); + else + __vc4_hvs_stop_channel(hvs, chan); +} + int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc); @@ -505,8 +842,13 @@ static void vc4_hvs_install_dlist(struct drm_crtc *crtc) if (!drm_dev_enter(dev, &idx)) return; - HVS_WRITE(SCALER_DISPLISTX(vc4_state->assigned_channel), - vc4_state->mm.start); + if (vc4->gen >= VC4_GEN_6_C) + HVS_WRITE(SCALER6_DISPX_LPTRS(vc4_state->assigned_channel), + VC4_SET_FIELD(vc4_state->mm.start, + SCALER6_DISPX_LPTRS_HEADE)); + else + HVS_WRITE(SCALER_DISPLISTX(vc4_state->assigned_channel), + vc4_state->mm.start); drm_dev_exit(idx); } @@ -561,7 +903,11 @@ void vc4_hvs_atomic_enable(struct drm_crtc *crtc, vc4_hvs_install_dlist(crtc); vc4_hvs_update_dlist(crtc); - vc4_hvs_init_channel(vc4->hvs, crtc, mode, oneshot); + + if (vc4->gen >= VC4_GEN_6_C) + vc6_hvs_init_channel(vc4->hvs, crtc, mode, oneshot); + else + vc4_hvs_init_channel(vc4->hvs, crtc, mode, oneshot); } void vc4_hvs_atomic_disable(struct drm_crtc *crtc, @@ -590,13 +936,15 @@ void vc4_hvs_atomic_flush(struct drm_crtc *crtc, struct drm_plane *plane; struct vc4_plane_state *vc4_plane_state; bool debug_dump_regs = false; - bool enable_bg_fill = false; + bool enable_bg_fill = true; u32 __iomem *dlist_start = vc4->hvs->dlist + vc4_state->mm.start; u32 __iomem *dlist_next = dlist_start; unsigned int zpos = 0; bool found = false; int idx; + WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); + if (!drm_dev_enter(dev, &idx)) { vc4_crtc_send_vblank(crtc); return; @@ -645,13 +993,26 @@ void vc4_hvs_atomic_flush(struct drm_crtc *crtc, WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size); - if (enable_bg_fill) + if (vc4->gen >= VC4_GEN_6_C) { /* This sets a black background color fill, as is the case * with other DRM drivers. */ + if (enable_bg_fill) + HVS_WRITE(SCALER6_DISPX_CTRL1(channel), + HVS_READ(SCALER6_DISPX_CTRL1(channel)) | + SCALER6_DISPX_CTRL1_BGENB); + else + HVS_WRITE(SCALER6_DISPX_CTRL1(channel), + HVS_READ(SCALER6_DISPX_CTRL1(channel)) & + ~SCALER6_DISPX_CTRL1_BGENB); + } else { + /* we can actually run with a lower core clock when background + * fill is enabled on VC4_GEN_5 so leave it enabled always. + */ HVS_WRITE(SCALER_DISPBKGNDX(channel), HVS_READ(SCALER_DISPBKGNDX(channel)) | SCALER_DISPBKGND_FILL); + } /* Only update DISPLIST if the CRTC was already running and is not * being disabled. @@ -668,6 +1029,8 @@ void vc4_hvs_atomic_flush(struct drm_crtc *crtc, if (crtc->state->color_mgmt_changed) { u32 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(channel)); + WARN_ON_ONCE(vc4->gen > VC4_GEN_5); + if (crtc->state->gamma_lut) { vc4_hvs_update_gamma_lut(hvs, vc4_crtc); dispbkgndx |= SCALER_DISPBKGND_GAMMA; @@ -697,6 +1060,8 @@ void vc4_hvs_mask_underrun(struct vc4_hvs *hvs, int channel) u32 dispctrl; int idx; + WARN_ON(vc4->gen > VC4_GEN_5); + if (!drm_dev_enter(drm, &idx)) return; @@ -717,6 +1082,8 @@ void vc4_hvs_unmask_underrun(struct vc4_hvs *hvs, int channel) u32 dispctrl; int idx; + WARN_ON(vc4->gen > VC4_GEN_5); + if (!drm_dev_enter(drm, &idx)) return; @@ -751,6 +1118,8 @@ static irqreturn_t vc4_hvs_irq_handler(int irq, void *data) u32 status; u32 dspeislur; + WARN_ON(vc4->gen > VC4_GEN_5); + /* * NOTE: We don't need to protect the register access using * drm_dev_enter() there because the interrupt handler lifetime @@ -802,7 +1171,12 @@ int vc4_hvs_debugfs_init(struct drm_minor *minor) minor->debugfs_root, &vc4->load_tracker_enabled); - drm_debugfs_add_file(drm, "hvs_dlists", vc4_hvs_debugfs_dlist, NULL); + if (vc4->gen >= VC4_GEN_6_C) { + drm_debugfs_add_file(drm, "hvs_dlists", vc6_hvs_debugfs_dlist, NULL); + drm_debugfs_add_file(drm, "hvs_upm", vc6_hvs_debugfs_upm_allocs, NULL); + } else { + drm_debugfs_add_file(drm, "hvs_dlists", vc4_hvs_debugfs_dlist, NULL); + } drm_debugfs_add_file(drm, "hvs_underrun", vc4_hvs_debugfs_underrun, NULL); @@ -817,6 +1191,10 @@ struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4, { struct drm_device *drm = &vc4->base; struct vc4_hvs *hvs; + unsigned int dlist_start; + size_t dlist_size; + size_t lbm_size; + unsigned int i; hvs = drmm_kzalloc(drm, sizeof(*hvs), GFP_KERNEL); if (!hvs) @@ -828,27 +1206,94 @@ struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4, spin_lock_init(&hvs->mm_lock); - /* Set up the HVS display list memory manager. We never - * overwrite the setup from the bootloader (just 128b out of - * our 16K), since we don't want to scramble the screen when - * transitioning from the firmware's boot setup to runtime. - */ - hvs->dlist_mem_size = (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END; - drm_mm_init(&hvs->dlist_mm, - HVS_BOOTLOADER_DLIST_END, - hvs->dlist_mem_size); + switch (vc4->gen) { + case VC4_GEN_4: + case VC4_GEN_5: + /* Set up the HVS display list memory manager. We never + * overwrite the setup from the bootloader (just 128b + * out of our 16K), since we don't want to scramble the + * screen when transitioning from the firmware's boot + * setup to runtime. + */ + dlist_start = HVS_BOOTLOADER_DLIST_END; + dlist_size = (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END; + break; + + case VC4_GEN_6_C: + case VC4_GEN_6_D: + dlist_start = HVS_BOOTLOADER_DLIST_END; + + /* + * If we are running a test, it means that we can't + * access a register. Use a plausible size then. + */ + if (!kunit_get_current_test()) + dlist_size = HVS_READ(SCALER6_CXM_SIZE); + else + dlist_size = 4096; + + for (i = 0; i < VC4_NUM_UPM_HANDLES; i++) { + refcount_set(&hvs->upm_refcounts[i].refcount, 0); + hvs->upm_refcounts[i].hvs = hvs; + } + + break; + + default: + drm_err(drm, "Unknown VC4 generation: %d", vc4->gen); + return ERR_PTR(-ENODEV); + } + + drm_mm_init(&hvs->dlist_mm, dlist_start, dlist_size); + + hvs->dlist_mem_size = dlist_size; /* Set up the HVS LBM memory manager. We could have some more * complicated data structure that allowed reuse of LBM areas * between planes when they don't overlap on the screen, but * for now we just allocate globally. */ - if (vc4->gen == VC4_GEN_4) + + switch (vc4->gen) { + case VC4_GEN_4: /* 48k words of 2x12-bit pixels */ - drm_mm_init(&hvs->lbm_mm, 0, 48 * 1024); - else + lbm_size = 48 * SZ_1K; + break; + + case VC4_GEN_5: /* 60k words of 4x12-bit pixels */ - drm_mm_init(&hvs->lbm_mm, 0, 60 * 1024); + lbm_size = 60 * SZ_1K; + break; + + case VC4_GEN_6_C: + case VC4_GEN_6_D: + /* + * If we are running a test, it means that we can't + * access a register. Use a plausible size then. + */ + lbm_size = 1024; + break; + + default: + drm_err(drm, "Unknown VC4 generation: %d", vc4->gen); + return ERR_PTR(-ENODEV); + } + + drm_mm_init(&hvs->lbm_mm, 0, lbm_size); + + if (vc4->gen >= VC4_GEN_6_C) { + ida_init(&hvs->upm_handles); + + /* + * NOTE: On BCM2712, the size can also be read through + * the SCALER_UBM_SIZE register. We would need to do a + * register access though, which we can't do with kunit + * that also uses this function to create its mock + * device. + */ + drm_mm_init(&hvs->upm_mm, 0, 1024 * HVS_UBM_WORD_SIZE); + } + vc4->hvs = hvs; @@ -945,10 +1390,150 @@ static int vc4_hvs_hw_init(struct vc4_hvs *hvs) return 0; } +#define CFC1_N_NL_CSC_CTRL(x) (0xa000 + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C00(x) (0xa008 + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C01(x) (0xa00c + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C02(x) (0xa010 + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C03(x) (0xa014 + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C04(x) (0xa018 + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C10(x) (0xa01c + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C11(x) (0xa020 + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C12(x) (0xa024 + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C13(x) (0xa028 + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C14(x) (0xa02c + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C20(x) (0xa030 + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C21(x) (0xa034 + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C22(x) (0xa038 + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C23(x) (0xa03c + ((x) * 0x3000)) +#define CFC1_N_MA_CSC_COEFF_C24(x) (0xa040 + ((x) * 0x3000)) + +#define SCALER_PI_CMP_CSC_RED0(x) (0x200 + ((x) * 0x40)) +#define SCALER_PI_CMP_CSC_RED1(x) (0x204 + ((x) * 0x40)) +#define SCALER_PI_CMP_CSC_RED_CLAMP(x) (0x208 + ((x) * 0x40)) +#define SCALER_PI_CMP_CSC_CFG(x) (0x20c + ((x) * 0x40)) +#define SCALER_PI_CMP_CSC_GREEN0(x) (0x210 + ((x) * 0x40)) +#define SCALER_PI_CMP_CSC_GREEN1(x) (0x214 + ((x) * 0x40)) +#define SCALER_PI_CMP_CSC_GREEN_CLAMP(x) (0x218 + ((x) * 0x40)) +#define SCALER_PI_CMP_CSC_BLUE0(x) (0x220 + ((x) * 0x40)) +#define SCALER_PI_CMP_CSC_BLUE1(x) (0x224 + ((x) * 0x40)) +#define SCALER_PI_CMP_CSC_BLUE_CLAMP(x) (0x228 + ((x) * 0x40)) + +/* 4 S2.22 multiplication factors, and 1 S9.15 addititive element for each of 3 + * output components + */ +struct vc6_csc_coeff_entry { + u32 csc[3][5]; +}; + +static const struct vc6_csc_coeff_entry csc_coeffs[2][3] = { + [DRM_COLOR_YCBCR_LIMITED_RANGE] = { + [DRM_COLOR_YCBCR_BT601] = { + .csc = { + { 0x004A8542, 0x0, 0x0066254A, 0x0, 0xFF908A0D }, + { 0x004A8542, 0xFFE6ED5D, 0xFFCBF856, 0x0, 0x0043C9A3 }, + { 0x004A8542, 0x00811A54, 0x0, 0x0, 0xFF759502 } + } + }, + [DRM_COLOR_YCBCR_BT709] = { + .csc = { + { 0x004A8542, 0x0, 0x0072BC44, 0x0, 0xFF83F312 }, + { 0x004A8542, 0xFFF25A22, 0xFFDDE4D0, 0x0, 0x00267064 }, + { 0x004A8542, 0x00873197, 0x0, 0x0, 0xFF6F7DC0 } + } + }, + [DRM_COLOR_YCBCR_BT2020] = { + .csc = { + { 0x004A8542, 0x0, 0x006B4A17, 0x0, 0xFF8B653F }, + { 0x004A8542, 0xFFF402D9, 0xFFDDE4D0, 0x0, 0x0024C7AE }, + { 0x004A8542, 0x008912CC, 0x0, 0x0, 0xFF6D9C8B } + } + } + }, + [DRM_COLOR_YCBCR_FULL_RANGE] = { + [DRM_COLOR_YCBCR_BT601] = { + .csc = { + { 0x00400000, 0x0, 0x0059BA5E, 0x0, 0xFFA645A1 }, + { 0x00400000, 0xFFE9F9AC, 0xFFD24B97, 0x0, 0x0043BABB }, + { 0x00400000, 0x00716872, 0x0, 0x0, 0xFF8E978D } + } + }, + [DRM_COLOR_YCBCR_BT709] = { + .csc = { + { 0x00400000, 0x0, 0x0064C985, 0x0, 0xFF9B367A }, + { 0x00400000, 0xFFF402E1, 0xFFE20A40, 0x0, 0x0029F2DE }, + { 0x00400000, 0x0076C226, 0x0, 0x0, 0xFF893DD9 } + } + }, + [DRM_COLOR_YCBCR_BT2020] = { + .csc = { + { 0x00400000, 0x0, 0x005E3F14, 0x0, 0xFFA1C0EB }, + { 0x00400000, 0xFFF577F6, 0xFFDB580F, 0x0, 0x002F2FFA }, + { 0x00400000, 0x007868DB, 0x0, 0x0, 0xFF879724 } + } + } + } +}; + +static int vc6_hvs_hw_init(struct vc4_hvs *hvs) +{ + const struct vc6_csc_coeff_entry *coeffs; + unsigned int i; + + HVS_WRITE(SCALER6_CONTROL, + SCALER6_CONTROL_HVS_EN | + VC4_SET_FIELD(8, SCALER6_CONTROL_PF_LINES) | + VC4_SET_FIELD(15, SCALER6_CONTROL_MAX_REQS)); + + /* Set HVS arbiter priority to max */ + HVS_WRITE(SCALER6(PRI_MAP0), 0xffffffff); + HVS_WRITE(SCALER6(PRI_MAP1), 0xffffffff); + + if (hvs->vc4->gen == VC4_GEN_6_C) { + for (i = 0; i < 6; i++) { + coeffs = &csc_coeffs[i / 3][i % 3]; + + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C00(i), coeffs->csc[0][0]); + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C01(i), coeffs->csc[0][1]); + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C02(i), coeffs->csc[0][2]); + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C03(i), coeffs->csc[0][3]); + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C04(i), coeffs->csc[0][4]); + + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C10(i), coeffs->csc[1][0]); + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C11(i), coeffs->csc[1][1]); + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C12(i), coeffs->csc[1][2]); + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C13(i), coeffs->csc[1][3]); + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C14(i), coeffs->csc[1][4]); + + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C20(i), coeffs->csc[2][0]); + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C21(i), coeffs->csc[2][1]); + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C22(i), coeffs->csc[2][2]); + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C23(i), coeffs->csc[2][3]); + HVS_WRITE(CFC1_N_MA_CSC_COEFF_C24(i), coeffs->csc[2][4]); + + HVS_WRITE(CFC1_N_NL_CSC_CTRL(i), BIT(15)); + } + } else { + for (i = 0; i < 8; i++) { + HVS_WRITE(SCALER_PI_CMP_CSC_RED0(i), 0x1f002566); + HVS_WRITE(SCALER_PI_CMP_CSC_RED1(i), 0x3994); + HVS_WRITE(SCALER_PI_CMP_CSC_RED_CLAMP(i), 0xfff00000); + HVS_WRITE(SCALER_PI_CMP_CSC_CFG(i), 0x1); + HVS_WRITE(SCALER_PI_CMP_CSC_GREEN0(i), 0x18002566); + HVS_WRITE(SCALER_PI_CMP_CSC_GREEN1(i), 0xf927eee2); + HVS_WRITE(SCALER_PI_CMP_CSC_GREEN_CLAMP(i), 0xfff00000); + HVS_WRITE(SCALER_PI_CMP_CSC_BLUE0(i), 0x18002566); + HVS_WRITE(SCALER_PI_CMP_CSC_BLUE1(i), 0x43d80000); + HVS_WRITE(SCALER_PI_CMP_CSC_BLUE_CLAMP(i), 0xfff00000); + } + } + + return 0; +} + static int vc4_hvs_cob_init(struct vc4_hvs *hvs) { struct vc4_dev *vc4 = hvs->vc4; - u32 reg, top; + u32 reg, top, base; /* * Recompute Composite Output Buffer (COB) allocations for the @@ -1009,6 +1594,32 @@ static int vc4_hvs_cob_init(struct vc4_hvs *hvs) HVS_WRITE(SCALER_DISPBASE0, reg); break; + case VC4_GEN_6_C: + case VC4_GEN_6_D: + #define VC6_COB_LINE_WIDTH 3840 + #define VC6_COB_NUM_LINES 4 + base = 0; + top = 3840; + + HVS_WRITE(SCALER6_DISPX_COB(2), + VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) | + VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE)); + + base = top + 16; + top += VC6_COB_LINE_WIDTH * VC6_COB_NUM_LINES; + + HVS_WRITE(SCALER6_DISPX_COB(1), + VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) | + VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE)); + + base = top + 16; + top += VC6_COB_LINE_WIDTH * VC6_COB_NUM_LINES; + + HVS_WRITE(SCALER6_DISPX_COB(0), + VC4_SET_FIELD(top, SCALER6_DISPX_COB_TOP) | + VC4_SET_FIELD(base, SCALER6_DISPX_COB_BASE)); + break; + default: return -EINVAL; } @@ -1034,10 +1645,23 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) return PTR_ERR(hvs); hvs->regset.base = hvs->regs; - hvs->regset.regs = vc4_hvs_regs; - hvs->regset.nregs = ARRAY_SIZE(vc4_hvs_regs); - if (vc4->gen == VC4_GEN_5) { + if (vc4->gen == VC4_GEN_6_C) { + hvs->regset.regs = vc6_hvs_regs; + hvs->regset.nregs = ARRAY_SIZE(vc6_hvs_regs); + + if (VC4_GET_FIELD(HVS_READ(SCALER6_VERSION), SCALER6_VERSION) == + SCALER6_VERSION_D0) { + vc4->gen = VC4_GEN_6_D; + hvs->regset.regs = vc6_d_hvs_regs; + hvs->regset.nregs = ARRAY_SIZE(vc6_d_hvs_regs); + } + } else { + hvs->regset.regs = vc4_hvs_regs; + hvs->regset.nregs = ARRAY_SIZE(vc4_hvs_regs); + } + + if (vc4->gen >= VC4_GEN_5) { struct rpi_firmware *firmware; struct device_node *node; unsigned int max_rate; @@ -1051,12 +1675,20 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) if (!firmware) return -EPROBE_DEFER; - hvs->core_clk = devm_clk_get(&pdev->dev, NULL); + hvs->core_clk = devm_clk_get(&pdev->dev, + (vc4->gen >= VC4_GEN_6_C) ? "core" : NULL); if (IS_ERR(hvs->core_clk)) { dev_err(&pdev->dev, "Couldn't get core clock\n"); return PTR_ERR(hvs->core_clk); } + hvs->disp_clk = devm_clk_get(&pdev->dev, + (vc4->gen >= VC4_GEN_6_C) ? "disp" : NULL); + if (IS_ERR(hvs->disp_clk)) { + dev_err(&pdev->dev, "Couldn't get disp clock\n"); + return PTR_ERR(hvs->disp_clk); + } + max_rate = rpi_firmware_clk_get_max_rate(firmware, RPI_FIRMWARE_CORE_CLK_ID); rpi_firmware_put(firmware); @@ -1073,14 +1705,23 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) dev_err(&pdev->dev, "Couldn't enable the core clock\n"); return ret; } + + ret = clk_prepare_enable(hvs->disp_clk); + if (ret) { + dev_err(&pdev->dev, "Couldn't enable the disp clock\n"); + return ret; + } } - if (vc4->gen == VC4_GEN_4) - hvs->dlist = hvs->regs + SCALER_DLIST_START; - else + if (vc4->gen >= VC4_GEN_5) hvs->dlist = hvs->regs + SCALER5_DLIST_START; + else + hvs->dlist = hvs->regs + SCALER_DLIST_START; - ret = vc4_hvs_hw_init(hvs); + if (vc4->gen >= VC4_GEN_6_C) + ret = vc6_hvs_hw_init(hvs); + else + ret = vc4_hvs_hw_init(hvs); if (ret) return ret; @@ -1097,10 +1738,12 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data) if (ret) return ret; - ret = devm_request_irq(dev, platform_get_irq(pdev, 0), - vc4_hvs_irq_handler, 0, "vc4 hvs", drm); - if (ret) - return ret; + if (vc4->gen < VC4_GEN_6_C) { + ret = devm_request_irq(dev, platform_get_irq(pdev, 0), + vc4_hvs_irq_handler, 0, "vc4 hvs", drm); + if (ret) + return ret; + } return 0; } @@ -1125,6 +1768,7 @@ static void vc4_hvs_unbind(struct device *dev, struct device *master, drm_mm_remove_node(node); drm_mm_takedown(&vc4->hvs->lbm_mm); + clk_disable_unprepare(hvs->disp_clk); clk_disable_unprepare(hvs->core_clk); vc4->hvs = NULL; @@ -1147,6 +1791,7 @@ static void vc4_hvs_dev_remove(struct platform_device *pdev) static const struct of_device_id vc4_hvs_dt_match[] = { { .compatible = "brcm,bcm2711-hvs" }, + { .compatible = "brcm,bcm2712-hvs" }, { .compatible = "brcm,bcm2835-hvs" }, {} }; diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 58bbb9efc2df..f5b167417428 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -138,6 +138,8 @@ vc4_ctm_commit(struct vc4_dev *vc4, struct drm_atomic_state *state) struct vc4_ctm_state *ctm_state = to_vc4_ctm_state(vc4->ctm_manager.state); struct drm_color_ctm *ctm = ctm_state->ctm; + WARN_ON_ONCE(vc4->gen > VC4_GEN_5); + if (ctm_state->fifo) { HVS_WRITE(SCALER_OLEDCOEF2, VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[0]), @@ -213,6 +215,8 @@ static void vc4_hvs_pv_muxing_commit(struct vc4_dev *vc4, struct drm_crtc *crtc; unsigned int i; + WARN_ON_ONCE(vc4->gen != VC4_GEN_4); + for_each_new_crtc_in_state(state, crtc, crtc_state, i) { struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state); @@ -256,6 +260,8 @@ static void vc5_hvs_pv_muxing_commit(struct vc4_dev *vc4, unsigned int i; u32 reg; + WARN_ON_ONCE(vc4->gen != VC4_GEN_5); + for_each_new_crtc_in_state(state, crtc, crtc_state, i) { struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state); struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); @@ -320,17 +326,62 @@ static void vc5_hvs_pv_muxing_commit(struct vc4_dev *vc4, } } +static void vc6_hvs_pv_muxing_commit(struct vc4_dev *vc4, + struct drm_atomic_state *state) +{ + struct vc4_hvs *hvs = vc4->hvs; + struct drm_crtc_state *crtc_state; + struct drm_crtc *crtc; + unsigned int i; + + WARN_ON_ONCE(vc4->gen != VC4_GEN_6_C && vc4->gen != VC4_GEN_6_D); + + for_each_new_crtc_in_state(state, crtc, crtc_state, i) { + struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state); + struct vc4_encoder *vc4_encoder; + struct drm_encoder *encoder; + unsigned char mux; + u32 reg; + + if (!vc4_state->update_muxing) + continue; + + if (vc4_state->assigned_channel != 1) + continue; + + encoder = vc4_get_crtc_encoder(crtc, crtc_state); + vc4_encoder = to_vc4_encoder(encoder); + switch (vc4_encoder->type) { + case VC4_ENCODER_TYPE_HDMI1: + mux = 0; + break; + + case VC4_ENCODER_TYPE_TXP1: + mux = 2; + break; + + default: + drm_err(&vc4->base, "Unhandled encoder type for PV muxing %d", + vc4_encoder->type); + mux = 0; + break; + } + + reg = HVS_READ(SCALER6_CONTROL); + HVS_WRITE(SCALER6_CONTROL, + (reg & ~SCALER6_CONTROL_DSP1_TARGET_MASK) | + VC4_SET_FIELD(mux, SCALER6_CONTROL_DSP1_TARGET)); + } +} + static void vc4_atomic_commit_tail(struct drm_atomic_state *state) { struct drm_device *dev = state->dev; struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_hvs *hvs = vc4->hvs; - struct drm_crtc_state *new_crtc_state; struct vc4_hvs_state *new_hvs_state; - struct drm_crtc *crtc; struct vc4_hvs_state *old_hvs_state; unsigned int channel; - int i; old_hvs_state = vc4_hvs_get_old_global_state(state); if (WARN_ON(IS_ERR(old_hvs_state))) @@ -340,14 +391,20 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) if (WARN_ON(IS_ERR(new_hvs_state))) return; - for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { - struct vc4_crtc_state *vc4_crtc_state; + if (vc4->gen < VC4_GEN_6_C) { + struct drm_crtc_state *new_crtc_state; + struct drm_crtc *crtc; + int i; - if (!new_crtc_state->commit) - continue; + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + struct vc4_crtc_state *vc4_crtc_state; + + if (!new_crtc_state->commit) + continue; - vc4_crtc_state = to_vc4_crtc_state(new_crtc_state); - vc4_hvs_mask_underrun(hvs, vc4_crtc_state->assigned_channel); + vc4_crtc_state = to_vc4_crtc_state(new_crtc_state); + vc4_hvs_mask_underrun(hvs, vc4_crtc_state->assigned_channel); + } } for (channel = 0; channel < HVS_NUM_CHANNELS; channel++) { @@ -382,16 +439,32 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) * modeset. */ WARN_ON(clk_set_min_rate(hvs->core_clk, core_rate)); + WARN_ON(clk_set_min_rate(hvs->disp_clk, core_rate)); } drm_atomic_helper_commit_modeset_disables(dev, state); - vc4_ctm_commit(vc4, state); + if (vc4->gen <= VC4_GEN_5) + vc4_ctm_commit(vc4, state); - if (vc4->gen == VC4_GEN_5) - vc5_hvs_pv_muxing_commit(vc4, state); - else + switch (vc4->gen) { + case VC4_GEN_4: vc4_hvs_pv_muxing_commit(vc4, state); + break; + + case VC4_GEN_5: + vc5_hvs_pv_muxing_commit(vc4, state); + break; + + case VC4_GEN_6_C: + case VC4_GEN_6_D: + vc6_hvs_pv_muxing_commit(vc4, state); + break; + + default: + drm_err(dev, "Unknown VC4 generation: %d", vc4->gen); + break; + } drm_atomic_helper_commit_planes(dev, state, DRM_PLANE_COMMIT_ACTIVE_ONLY); @@ -418,6 +491,7 @@ static void vc4_atomic_commit_tail(struct drm_atomic_state *state) * requirements. */ WARN_ON(clk_set_min_rate(hvs->core_clk, core_rate)); + WARN_ON(clk_set_min_rate(hvs->disp_clk, core_rate)); drm_dbg(dev, "Core clock actual rate: %lu Hz\n", clk_get_rate(hvs->core_clk)); @@ -1056,7 +1130,10 @@ int vc4_kms_load(struct drm_device *dev) return ret; } - if (vc4->gen == VC4_GEN_5) { + if (vc4->gen >= VC4_GEN_6_C) { + dev->mode_config.max_width = 8192; + dev->mode_config.max_height = 8192; + } else if (vc4->gen >= VC4_GEN_5) { dev->mode_config.max_width = 7680; dev->mode_config.max_height = 7680; } else { diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index ba6e86d62a77..d608860d525f 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -278,7 +278,10 @@ static bool plane_enabled(struct drm_plane_state *state) static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) { + struct vc4_dev *vc4 = to_vc4_dev(plane->dev); + struct vc4_hvs *hvs = vc4->hvs; struct vc4_plane_state *vc4_state; + unsigned int i; if (WARN_ON(!plane->state)) return NULL; @@ -288,6 +291,12 @@ static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane return NULL; memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); + + for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { + if (vc4_state->upm_handle[i]) + refcount_inc(&hvs->upm_refcounts[vc4_state->upm_handle[i]].refcount); + } + vc4_state->dlist_initialized = 0; __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); @@ -306,18 +315,47 @@ static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane return &vc4_state->base; } +static void vc4_plane_release_upm_ida(struct vc4_hvs *hvs, unsigned int upm_handle) +{ + struct vc4_upm_refcounts *refcount = &hvs->upm_refcounts[upm_handle]; + unsigned long irqflags; + + spin_lock_irqsave(&hvs->mm_lock, irqflags); + drm_mm_remove_node(&refcount->upm); + spin_unlock_irqrestore(&hvs->mm_lock, irqflags); + refcount->upm.start = 0; + refcount->upm.size = 0; + refcount->size = 0; + + ida_free(&hvs->upm_handles, upm_handle); +} + static void vc4_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { struct vc4_dev *vc4 = to_vc4_dev(plane->dev); + struct vc4_hvs *hvs = vc4->hvs; struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + unsigned int i; if (drm_mm_node_allocated(&vc4_state->lbm)) { unsigned long irqflags; - spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); + spin_lock_irqsave(&hvs->mm_lock, irqflags); drm_mm_remove_node(&vc4_state->lbm); - spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); + spin_unlock_irqrestore(&hvs->mm_lock, irqflags); + } + + for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { + struct vc4_upm_refcounts *refcount; + + if (!vc4_state->upm_handle[i]) + continue; + + refcount = &hvs->upm_refcounts[vc4_state->upm_handle[i]]; + + if (refcount_dec_and_test(&refcount->refcount)) + vc4_plane_release_upm_ida(hvs, vc4_state->upm_handle[i]); } kfree(vc4_state->dlist); @@ -330,7 +368,10 @@ static void vc4_plane_reset(struct drm_plane *plane) { struct vc4_plane_state *vc4_state; - WARN_ON(plane->state); + if (plane->state) + __drm_atomic_helper_plane_destroy_state(plane->state); + + kfree(plane->state); vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL); if (!vc4_state) @@ -528,8 +569,11 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) { + struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev); u32 scale, recip; + WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); + scale = src / dst; /* The specs note that while the reciprocal would be defined @@ -538,6 +582,11 @@ static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) recip = ~0 / scale; vc4_dlist_write(vc4_state, + /* + * The BCM2712 is lacking BIT(31) compared to + * the previous generations, but we don't use + * it. + */ VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); vc4_dlist_write(vc4_state, @@ -550,10 +599,13 @@ static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst, u32 xy, int channel) { + struct vc4_dev *vc4 = to_vc4_dev(vc4_state->base.plane->dev); u32 scale = src / dst; s32 offset, offset2; s32 phase; + WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); + /* * Start the phase at 1/2 pixel from the 1st pixel at src_x. * 1/4 pixel for YUV. @@ -598,10 +650,15 @@ static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst, vc4_dlist_write(vc4_state, SCALER_PPF_AGC | VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | + /* + * The register layout documentation is slightly + * different to setup the phase in the BCM2712, + * but they seem equivalent. + */ VC4_SET_FIELD(phase, SCALER_PPF_IPHASE)); } -static u32 vc4_lbm_size(struct drm_plane_state *state) +static u32 __vc4_lbm_size(struct drm_plane_state *state) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); @@ -649,11 +706,139 @@ static u32 vc4_lbm_size(struct drm_plane_state *state) return lbm; } +static unsigned int vc4_lbm_words_per_component(const struct drm_plane_state *state, + unsigned int channel) +{ + const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + + switch (vc4_state->y_scaling[channel]) { + case VC4_SCALING_PPF: + return 4; + + case VC4_SCALING_TPZ: + return 2; + + default: + return 0; + } +} + +static unsigned int vc4_lbm_components(const struct drm_plane_state *state, + unsigned int channel) +{ + const struct drm_format_info *info = state->fb->format; + const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + + if (vc4_state->y_scaling[channel] == VC4_SCALING_NONE) + return 0; + + if (info->is_yuv) + return channel ? 2 : 1; + + if (info->has_alpha) + return 4; + + return 3; +} + +static unsigned int vc4_lbm_channel_size(const struct drm_plane_state *state, + unsigned int channel) +{ + const struct drm_format_info *info = state->fb->format; + const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + unsigned int channels_scaled = 0; + unsigned int components, words, wpc; + unsigned int width, lines; + unsigned int i; + + /* LBM is meant to use the smaller of source or dest width, but there + * is a issue with UV scaling that the size required for the second + * channel is based on the source width only. + */ + if (info->hsub > 1 && channel == 1) + width = state->src_w >> 16; + else + width = min(state->src_w >> 16, state->crtc_w); + width = round_up(width / info->hsub, 4); + + wpc = vc4_lbm_words_per_component(state, channel); + if (!wpc) + return 0; + + components = vc4_lbm_components(state, channel); + if (!components) + return 0; + + if (state->alpha != DRM_BLEND_ALPHA_OPAQUE && info->has_alpha) + components -= 1; + + words = width * wpc * components; + + lines = DIV_ROUND_UP(words, 128 / info->hsub); + + for (i = 0; i < 2; i++) + if (vc4_state->y_scaling[channel] != VC4_SCALING_NONE) + channels_scaled++; + + if (channels_scaled == 1) + lines = lines / 2; + + return lines; +} + +static unsigned int __vc6_lbm_size(const struct drm_plane_state *state) +{ + const struct drm_format_info *info = state->fb->format; + + if (info->hsub > 1) + return max(vc4_lbm_channel_size(state, 0), + vc4_lbm_channel_size(state, 1)); + else + return vc4_lbm_channel_size(state, 0); +} + +static u32 vc4_lbm_size(struct drm_plane_state *state) +{ + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); + + /* LBM is not needed when there's no vertical scaling. */ + if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && + vc4_state->y_scaling[1] == VC4_SCALING_NONE) + return 0; + + if (vc4->gen >= VC4_GEN_6_C) + return __vc6_lbm_size(state); + else + return __vc4_lbm_size(state); +} + +static size_t vc6_upm_size(const struct drm_plane_state *state, + unsigned int plane) +{ + const struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + unsigned int stride = state->fb->pitches[plane]; + + /* + * TODO: This only works for raster formats, and is sub-optimal + * for buffers with a stride aligned on 32 bytes. + */ + unsigned int words_per_line = (stride + 62) / 32; + unsigned int fetch_region_size = words_per_line * 32; + unsigned int buffer_lines = 2 << vc4_state->upm_buffer_lines; + unsigned int buffer_size = fetch_region_size * buffer_lines; + + return ALIGN(buffer_size, HVS_UBM_WORD_SIZE); +} + static void vc4_write_scaling_parameters(struct drm_plane_state *state, int channel) { + struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + WARN_ON_ONCE(vc4->gen > VC4_GEN_6_D); + /* Ch0 H-PPF Word 0: Scaling Parameters */ if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { vc4_write_ppf(vc4_state, vc4_state->src_w[channel], @@ -750,6 +935,10 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state) if (!lbm_size) return 0; + /* + * NOTE: BCM2712 doesn't need to be aligned, since the size + * returned by vc4_lbm_size() is in words already. + */ if (vc4->gen == VC4_GEN_5) lbm_size = ALIGN(lbm_size, 64); else if (vc4->gen == VC4_GEN_4) @@ -787,6 +976,108 @@ static int vc4_plane_allocate_lbm(struct drm_plane_state *state) return 0; } +static int vc6_plane_allocate_upm(struct drm_plane_state *state) +{ + const struct drm_format_info *info = state->fb->format; + struct drm_device *drm = state->plane->dev; + struct vc4_dev *vc4 = to_vc4_dev(drm); + struct vc4_hvs *hvs = vc4->hvs; + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + unsigned int i; + int ret; + + WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C); + + vc4_state->upm_buffer_lines = SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES; + + for (i = 0; i < info->num_planes; i++) { + struct vc4_upm_refcounts *refcount; + int upm_handle; + unsigned long irqflags; + size_t upm_size; + + upm_size = vc6_upm_size(state, i); + if (!upm_size) + return -EINVAL; + upm_handle = vc4_state->upm_handle[i]; + + if (upm_handle && + hvs->upm_refcounts[upm_handle].size == upm_size) { + /* Allocation is the same size as the previous user of + * the plane. Keep the allocation. + */ + vc4_state->upm_handle[i] = upm_handle; + } else { + if (upm_handle && + refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) { + vc4_plane_release_upm_ida(hvs, upm_handle); + vc4_state->upm_handle[i] = 0; + } + + upm_handle = ida_alloc_range(&hvs->upm_handles, 1, + VC4_NUM_UPM_HANDLES, + GFP_KERNEL); + if (upm_handle < 0) { + drm_dbg(drm, "Out of upm_handles\n"); + return upm_handle; + } + vc4_state->upm_handle[i] = upm_handle; + + refcount = &hvs->upm_refcounts[upm_handle]; + refcount_set(&refcount->refcount, 1); + refcount->size = upm_size; + + spin_lock_irqsave(&hvs->mm_lock, irqflags); + ret = drm_mm_insert_node_generic(&hvs->upm_mm, + &refcount->upm, + upm_size, HVS_UBM_WORD_SIZE, + 0, 0); + spin_unlock_irqrestore(&hvs->mm_lock, irqflags); + if (ret) { + drm_err(drm, "Failed to allocate UPM entry: %d\n", ret); + refcount_set(&refcount->refcount, 0); + ida_free(&hvs->upm_handles, upm_handle); + vc4_state->upm_handle[i] = 0; + return ret; + } + } + + refcount = &hvs->upm_refcounts[upm_handle]; + vc4_state->dlist[vc4_state->ptr0_offset[i]] |= + VC4_SET_FIELD(refcount->upm.start / HVS_UBM_WORD_SIZE, + SCALER6_PTR0_UPM_BASE) | + VC4_SET_FIELD(vc4_state->upm_handle[i] - 1, + SCALER6_PTR0_UPM_HANDLE) | + VC4_SET_FIELD(vc4_state->upm_buffer_lines, + SCALER6_PTR0_UPM_BUFF_SIZE); + } + + return 0; +} + +static void vc6_plane_free_upm(struct drm_plane_state *state) +{ + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + struct drm_device *drm = state->plane->dev; + struct vc4_dev *vc4 = to_vc4_dev(drm); + struct vc4_hvs *hvs = vc4->hvs; + unsigned int i; + + WARN_ON_ONCE(vc4->gen < VC4_GEN_6_C); + + for (i = 0; i < DRM_FORMAT_MAX_PLANES; i++) { + unsigned int upm_handle; + + upm_handle = vc4_state->upm_handle[i]; + if (!upm_handle) + continue; + + if (refcount_dec_and_test(&hvs->upm_refcounts[upm_handle].refcount)) + vc4_plane_release_upm_ida(hvs, upm_handle); + vc4_state->upm_handle[i] = 0; + } +} + /* * The colorspace conversion matrices are held in 3 entries in the dlist. * Create an array of them, with entries for each full and limited mode, and @@ -834,6 +1125,11 @@ static const u32 colorspace_coeffs[2][DRM_COLOR_ENCODING_MAX][3] = { static u32 vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state *state) { + struct drm_device *dev = state->state->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + + WARN_ON_ONCE(vc4->gen != VC4_GEN_4); + if (!state->fb->format->has_alpha) return VC4_SET_FIELD(SCALER_POS2_ALPHA_MODE_FIXED, SCALER_POS2_ALPHA_MODE); @@ -855,25 +1151,56 @@ static u32 vc4_hvs4_get_alpha_blend_mode(struct drm_plane_state *state) static u32 vc4_hvs5_get_alpha_blend_mode(struct drm_plane_state *state) { - if (!state->fb->format->has_alpha) - return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED, - SCALER5_CTL2_ALPHA_MODE); + struct drm_device *dev = state->state->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); - switch (state->pixel_blend_mode) { - case DRM_MODE_BLEND_PIXEL_NONE: - return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED, - SCALER5_CTL2_ALPHA_MODE); + WARN_ON_ONCE(vc4->gen != VC4_GEN_5 && vc4->gen != VC4_GEN_6_C && + vc4->gen != VC4_GEN_6_D); + + switch (vc4->gen) { default: - case DRM_MODE_BLEND_PREMULTI: - return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE, - SCALER5_CTL2_ALPHA_MODE) | - SCALER5_CTL2_ALPHA_PREMULT; - case DRM_MODE_BLEND_COVERAGE: - return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE, - SCALER5_CTL2_ALPHA_MODE); + case VC4_GEN_5: + case VC4_GEN_6_C: + if (!state->fb->format->has_alpha) + return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED, + SCALER5_CTL2_ALPHA_MODE); + + switch (state->pixel_blend_mode) { + case DRM_MODE_BLEND_PIXEL_NONE: + return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_FIXED, + SCALER5_CTL2_ALPHA_MODE); + default: + case DRM_MODE_BLEND_PREMULTI: + return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE, + SCALER5_CTL2_ALPHA_MODE) | + SCALER5_CTL2_ALPHA_PREMULT; + case DRM_MODE_BLEND_COVERAGE: + return VC4_SET_FIELD(SCALER5_CTL2_ALPHA_MODE_PIPELINE, + SCALER5_CTL2_ALPHA_MODE); + } + case VC4_GEN_6_D: + /* 2712-D configures fixed alpha mode in CTL0 */ + return state->pixel_blend_mode == DRM_MODE_BLEND_PREMULTI ? + SCALER5_CTL2_ALPHA_PREMULT : 0; } } +static u32 vc4_hvs6_get_alpha_mask_mode(struct drm_plane_state *state) +{ + struct drm_device *dev = state->state->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + + WARN_ON_ONCE(vc4->gen != VC4_GEN_6_C && vc4->gen != VC4_GEN_6_D); + + if (vc4->gen == VC4_GEN_6_D && + (!state->fb->format->has_alpha || + state->pixel_blend_mode == DRM_MODE_BLEND_PIXEL_NONE)) + return VC4_SET_FIELD(SCALER6D_CTL0_ALPHA_MASK_FIXED, + SCALER6_CTL0_ALPHA_MASK); + + return VC4_SET_FIELD(SCALER6_CTL0_ALPHA_MASK_NONE, SCALER6_CTL0_ALPHA_MASK); +} + /* Writes out a full display list for an active plane to the plane's * private dlist state. */ @@ -906,6 +1233,13 @@ static int vc4_plane_mode_set(struct drm_plane *plane, if (ret) return ret; + if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || + !vc4_state->crtc_w || !vc4_state->crtc_h) { + /* 0 source size probably means the plane is offscreen */ + vc4_state->dlist_initialized = 1; + return 0; + } + width = vc4_state->src_w[0] >> 16; height = vc4_state->src_h[0] >> 16; @@ -1363,6 +1697,427 @@ static int vc4_plane_mode_set(struct drm_plane *plane, return 0; } +static u32 vc6_plane_get_csc_mode(struct vc4_plane_state *vc4_state) +{ + struct drm_plane_state *state = &vc4_state->base; + struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); + u32 ret = 0; + + if (vc4_state->is_yuv) { + enum drm_color_encoding color_encoding = state->color_encoding; + enum drm_color_range color_range = state->color_range; + + /* CSC pre-loaded with: + * 0 = BT601 limited range + * 1 = BT709 limited range + * 2 = BT2020 limited range + * 3 = BT601 full range + * 4 = BT709 full range + * 5 = BT2020 full range + */ + if (color_encoding > DRM_COLOR_YCBCR_BT2020) + color_encoding = DRM_COLOR_YCBCR_BT601; + if (color_range > DRM_COLOR_YCBCR_FULL_RANGE) + color_range = DRM_COLOR_YCBCR_LIMITED_RANGE; + + if (vc4->gen == VC4_GEN_6_C) { + ret |= SCALER6C_CTL2_CSC_ENABLE; + ret |= VC4_SET_FIELD(color_encoding + (color_range * 3), + SCALER6C_CTL2_BRCM_CFC_CONTROL); + } else { + ret |= SCALER6D_CTL2_CSC_ENABLE; + ret |= VC4_SET_FIELD(color_encoding + (color_range * 3), + SCALER6D_CTL2_BRCM_CFC_CONTROL); + } + } + + return ret; +} + +static int vc6_plane_mode_set(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct drm_device *drm = plane->dev; + struct vc4_dev *vc4 = to_vc4_dev(drm); + struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); + struct drm_framebuffer *fb = state->fb; + const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); + u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); + int num_planes = fb->format->num_planes; + u32 h_subsample = fb->format->hsub; + u32 v_subsample = fb->format->vsub; + bool mix_plane_alpha; + bool covers_screen; + u32 scl0, scl1, pitch0; + u32 tiling, src_x, src_y; + u32 width, height; + u32 hvs_format = format->hvs; + u32 offsets[3] = { 0 }; + unsigned int rotation; + int ret, i; + + if (vc4_state->dlist_initialized) + return 0; + + ret = vc4_plane_setup_clipping_and_scaling(state); + if (ret) + return ret; + + if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || + !vc4_state->crtc_w || !vc4_state->crtc_h) { + /* 0 source size probably means the plane is offscreen. + * 0 destination size is a redundant plane. + */ + vc4_state->dlist_initialized = 1; + return 0; + } + + width = vc4_state->src_w[0] >> 16; + height = vc4_state->src_h[0] >> 16; + + /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB + * and 4:4:4, scl1 should be set to scl0 so both channels of + * the scaler do the same thing. For YUV, the Y plane needs + * to be put in channel 1 and Cb/Cr in channel 0, so we swap + * the scl fields here. + */ + if (num_planes == 1) { + scl0 = vc4_get_scl_field(state, 0); + scl1 = scl0; + } else { + scl0 = vc4_get_scl_field(state, 1); + scl1 = vc4_get_scl_field(state, 0); + } + + rotation = drm_rotation_simplify(state->rotation, + DRM_MODE_ROTATE_0 | + DRM_MODE_REFLECT_X | + DRM_MODE_REFLECT_Y); + + /* We must point to the last line when Y reflection is enabled. */ + src_y = vc4_state->src_y >> 16; + if (rotation & DRM_MODE_REFLECT_Y) + src_y += height - 1; + + src_x = vc4_state->src_x >> 16; + + switch (base_format_mod) { + case DRM_FORMAT_MOD_LINEAR: + tiling = SCALER6_CTL0_ADDR_MODE_LINEAR; + + /* Adjust the base pointer to the first pixel to be scanned + * out. + */ + for (i = 0; i < num_planes; i++) { + offsets[i] += src_y / (i ? v_subsample : 1) * fb->pitches[i]; + offsets[i] += src_x / (i ? h_subsample : 1) * fb->format->cpp[i]; + } + + break; + + case DRM_FORMAT_MOD_BROADCOM_SAND128: + case DRM_FORMAT_MOD_BROADCOM_SAND256: { + uint32_t param = fourcc_mod_broadcom_param(fb->modifier); + u32 components_per_word; + u32 starting_offset; + u32 fetch_count; + + if (param > SCALER_TILE_HEIGHT_MASK) { + DRM_DEBUG_KMS("SAND height too large (%d)\n", + param); + return -EINVAL; + } + + if (fb->format->format == DRM_FORMAT_P030) { + hvs_format = HVS_PIXEL_FORMAT_YCBCR_10BIT; + tiling = SCALER6_CTL0_ADDR_MODE_128B; + } else { + hvs_format = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE; + + switch (base_format_mod) { + case DRM_FORMAT_MOD_BROADCOM_SAND128: + tiling = SCALER6_CTL0_ADDR_MODE_128B; + break; + case DRM_FORMAT_MOD_BROADCOM_SAND256: + tiling = SCALER6_CTL0_ADDR_MODE_256B; + break; + default: + return -EINVAL; + } + } + + /* Adjust the base pointer to the first pixel to be scanned + * out. + * + * For P030, y_ptr [31:4] is the 128bit word for the start pixel + * y_ptr [3:0] is the pixel (0-11) contained within that 128bit + * word that should be taken as the first pixel. + * Ditto uv_ptr [31:4] vs [3:0], however [3:0] contains the + * element within the 128bit word, eg for pixel 3 the value + * should be 6. + */ + for (i = 0; i < num_planes; i++) { + u32 tile_w, tile, x_off, pix_per_tile; + + if (fb->format->format == DRM_FORMAT_P030) { + /* + * Spec says: bits [31:4] of the given address + * should point to the 128-bit word containing + * the desired starting pixel, and bits[3:0] + * should be between 0 and 11, indicating which + * of the 12-pixels in that 128-bit word is the + * first pixel to be used + */ + u32 remaining_pixels = src_x % 96; + u32 aligned = remaining_pixels / 12; + u32 last_bits = remaining_pixels % 12; + + x_off = aligned * 16 + last_bits; + tile_w = 128; + pix_per_tile = 96; + } else { + switch (base_format_mod) { + case DRM_FORMAT_MOD_BROADCOM_SAND128: + tile_w = 128; + break; + case DRM_FORMAT_MOD_BROADCOM_SAND256: + tile_w = 256; + break; + default: + return -EINVAL; + } + pix_per_tile = tile_w / fb->format->cpp[0]; + x_off = (src_x % pix_per_tile) / + (i ? h_subsample : 1) * + fb->format->cpp[i]; + } + + tile = src_x / pix_per_tile; + + offsets[i] += param * tile_w * tile; + offsets[i] += src_y / (i ? v_subsample : 1) * tile_w; + offsets[i] += x_off & ~(i ? 1 : 0); + } + + components_per_word = fb->format->format == DRM_FORMAT_P030 ? 24 : 32; + starting_offset = src_x % components_per_word; + fetch_count = (width + starting_offset + components_per_word - 1) / + components_per_word; + + pitch0 = VC4_SET_FIELD(param, SCALER6_PTR2_PITCH) | + VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT); + break; + } + + default: + DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", + (long long)fb->modifier); + return -EINVAL; + } + + /* fetch an extra pixel if we don't actually line up with the left edge. */ + if ((vc4_state->src_x & 0xffff) && vc4_state->src_x < (state->fb->width << 16)) + width++; + + /* same for the right side */ + if (((vc4_state->src_x + vc4_state->src_w[0]) & 0xffff) && + vc4_state->src_x + vc4_state->src_w[0] < (state->fb->width << 16)) + width++; + + /* now for the top */ + if ((vc4_state->src_y & 0xffff) && vc4_state->src_y < (state->fb->height << 16)) + height++; + + /* and the bottom */ + if (((vc4_state->src_y + vc4_state->src_h[0]) & 0xffff) && + vc4_state->src_y + vc4_state->src_h[0] < (state->fb->height << 16)) + height++; + + /* for YUV444 hardware wants double the width, otherwise it doesn't + * fetch full width of chroma + */ + if (format->drm == DRM_FORMAT_YUV444 || format->drm == DRM_FORMAT_YVU444) + width <<= 1; + + /* Don't waste cycles mixing with plane alpha if the set alpha + * is opaque or there is no per-pixel alpha information. + * In any case we use the alpha property value as the fixed alpha. + */ + mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && + fb->format->has_alpha; + + /* Control Word 0: Scaling Configuration & Element Validity*/ + vc4_dlist_write(vc4_state, + SCALER6_CTL0_VALID | + VC4_SET_FIELD(tiling, SCALER6_CTL0_ADDR_MODE) | + vc4_hvs6_get_alpha_mask_mode(state) | + (vc4_state->is_unity ? SCALER6_CTL0_UNITY : 0) | + VC4_SET_FIELD(format->pixel_order_hvs5, SCALER6_CTL0_ORDERRGBA) | + VC4_SET_FIELD(scl1, SCALER6_CTL0_SCL1_MODE) | + VC4_SET_FIELD(scl0, SCALER6_CTL0_SCL0_MODE) | + VC4_SET_FIELD(hvs_format, SCALER6_CTL0_PIXEL_FORMAT)); + + /* Position Word 0: Image Position */ + vc4_state->pos0_offset = vc4_state->dlist_count; + vc4_dlist_write(vc4_state, + VC4_SET_FIELD(vc4_state->crtc_y, SCALER6_POS0_START_Y) | + (rotation & DRM_MODE_REFLECT_X ? SCALER6_POS0_HFLIP : 0) | + VC4_SET_FIELD(vc4_state->crtc_x, SCALER6_POS0_START_X)); + + /* Control Word 2: Alpha Value & CSC */ + vc4_dlist_write(vc4_state, + vc6_plane_get_csc_mode(vc4_state) | + vc4_hvs5_get_alpha_blend_mode(state) | + (mix_plane_alpha ? SCALER6_CTL2_ALPHA_MIX : 0) | + VC4_SET_FIELD(state->alpha >> 4, SCALER5_CTL2_ALPHA)); + + /* Position Word 1: Scaled Image Dimensions */ + if (!vc4_state->is_unity) + vc4_dlist_write(vc4_state, + VC4_SET_FIELD(vc4_state->crtc_h - 1, + SCALER6_POS1_SCL_LINES) | + VC4_SET_FIELD(vc4_state->crtc_w - 1, + SCALER6_POS1_SCL_WIDTH)); + + /* Position Word 2: Source Image Size */ + vc4_state->pos2_offset = vc4_state->dlist_count; + vc4_dlist_write(vc4_state, + VC4_SET_FIELD(height - 1, + SCALER6_POS2_SRC_LINES) | + VC4_SET_FIELD(width - 1, + SCALER6_POS2_SRC_WIDTH)); + + /* Position Word 3: Context */ + vc4_dlist_write(vc4_state, 0xc0c0c0c0); + + /* + * TODO: This only covers Raster Scan Order planes + */ + for (i = 0; i < num_planes; i++) { + struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, i); + dma_addr_t paddr = bo->dma_addr + fb->offsets[i] + offsets[i]; + + /* Pointer Word 0 */ + vc4_state->ptr0_offset[i] = vc4_state->dlist_count; + vc4_dlist_write(vc4_state, + (rotation & DRM_MODE_REFLECT_Y ? SCALER6_PTR0_VFLIP : 0) | + /* + * The UPM buffer will be allocated in + * vc6_plane_allocate_upm(). + */ + VC4_SET_FIELD(upper_32_bits(paddr) & 0xff, + SCALER6_PTR0_UPPER_ADDR)); + + /* Pointer Word 1 */ + vc4_dlist_write(vc4_state, lower_32_bits(paddr)); + + /* Pointer Word 2 */ + if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 && + base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) { + vc4_dlist_write(vc4_state, + VC4_SET_FIELD(fb->pitches[i], + SCALER6_PTR2_PITCH)); + } else { + vc4_dlist_write(vc4_state, pitch0); + } + } + + /* + * Palette Word 0 + * TODO: We're not using the palette mode + */ + + /* + * Trans Word 0 + * TODO: It's only relevant if we set the trans_rgb bit in the + * control word 0, and we don't at the moment. + */ + + vc4_state->lbm_offset = 0; + + if (!vc4_state->is_unity || fb->format->is_yuv) { + /* + * Reserve a slot for the LBM Base Address. The real value will + * be set when calling vc4_plane_allocate_lbm(). + */ + if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || + vc4_state->y_scaling[1] != VC4_SCALING_NONE) { + vc4_state->lbm_offset = vc4_state->dlist_count; + vc4_dlist_counter_increment(vc4_state); + } + + if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || + vc4_state->x_scaling[1] != VC4_SCALING_NONE || + vc4_state->y_scaling[0] != VC4_SCALING_NONE || + vc4_state->y_scaling[1] != VC4_SCALING_NONE) { + if (num_planes > 1) + /* + * Emit Cb/Cr as channel 0 and Y as channel + * 1. This matches how we set up scl0/scl1 + * above. + */ + vc4_write_scaling_parameters(state, 1); + + vc4_write_scaling_parameters(state, 0); + } + + /* + * If any PPF setup was done, then all the kernel + * pointers get uploaded. + */ + if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || + vc4_state->y_scaling[0] == VC4_SCALING_PPF || + vc4_state->x_scaling[1] == VC4_SCALING_PPF || + vc4_state->y_scaling[1] == VC4_SCALING_PPF) { + u32 kernel = + VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, + SCALER_PPF_KERNEL_OFFSET); + + /* HPPF plane 0 */ + vc4_dlist_write(vc4_state, kernel); + /* VPPF plane 0 */ + vc4_dlist_write(vc4_state, kernel); + /* HPPF plane 1 */ + vc4_dlist_write(vc4_state, kernel); + /* VPPF plane 1 */ + vc4_dlist_write(vc4_state, kernel); + } + } + + vc4_dlist_write(vc4_state, SCALER6_CTL0_END); + + vc4_state->dlist[0] |= + VC4_SET_FIELD(vc4_state->dlist_count, SCALER6_CTL0_NEXT); + + /* crtc_* are already clipped coordinates. */ + covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && + vc4_state->crtc_w == state->crtc->mode.hdisplay && + vc4_state->crtc_h == state->crtc->mode.vdisplay; + + /* + * Background fill might be necessary when the plane has per-pixel + * alpha content or a non-opaque plane alpha and could blend from the + * background or does not cover the entire screen. + */ + vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || + state->alpha != DRM_BLEND_ALPHA_OPAQUE; + + /* + * Flag the dlist as initialized to avoid checking it twice in case + * the async update check already called vc4_plane_mode_set() and + * decided to fallback to sync update because async update was not + * possible. + */ + vc4_state->dlist_initialized = 1; + + vc4_plane_calc_load(state); + + drm_dbg_driver(drm, "[PLANE:%d:%s] Computed DLIST size: %u\n", + plane->base.id, plane->name, vc4_state->dlist_count); + + return 0; +} + /* If a modeset involves changing the setup of a plane, the atomic * infrastructure will call this to validate a proposed plane setup. * However, if a plane isn't getting updated, this (and the @@ -1373,6 +2128,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, static int vc4_plane_atomic_check(struct drm_plane *plane, struct drm_atomic_state *state) { + struct vc4_dev *vc4 = to_vc4_dev(plane->dev); struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); struct vc4_plane_state *vc4_state = to_vc4_plane_state(new_plane_state); @@ -1380,17 +2136,38 @@ static int vc4_plane_atomic_check(struct drm_plane *plane, vc4_state->dlist_count = 0; - if (!plane_enabled(new_plane_state)) + if (!plane_enabled(new_plane_state)) { + struct drm_plane_state *old_plane_state = + drm_atomic_get_old_plane_state(state, plane); + + if (vc4->gen >= VC4_GEN_6_C && old_plane_state && + plane_enabled(old_plane_state)) { + vc6_plane_free_upm(new_plane_state); + } return 0; + } - ret = vc4_plane_mode_set(plane, new_plane_state); + if (vc4->gen >= VC4_GEN_6_C) + ret = vc6_plane_mode_set(plane, new_plane_state); + else + ret = vc4_plane_mode_set(plane, new_plane_state); if (ret) return ret; + if (!vc4_state->src_w[0] || !vc4_state->src_h[0] || + !vc4_state->crtc_w || !vc4_state->crtc_h) + return 0; + ret = vc4_plane_allocate_lbm(new_plane_state); if (ret) return ret; + if (vc4->gen >= VC4_GEN_6_C) { + ret = vc6_plane_allocate_upm(new_plane_state); + if (ret) + return ret; + } + return 0; } @@ -1439,7 +2216,8 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0); - uint32_t addr; + struct vc4_dev *vc4 = to_vc4_dev(plane->dev); + dma_addr_t dma_addr = bo->dma_addr + fb->offsets[0]; int idx; if (!drm_dev_enter(plane->dev, &idx)) @@ -1449,19 +2227,38 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) * because this is only called on the primary plane. */ WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0); - addr = bo->dma_addr + fb->offsets[0]; - /* Write the new address into the hardware immediately. The - * scanout will start from this address as soon as the FIFO - * needs to refill with pixels. - */ - writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); + if (vc4->gen == VC4_GEN_6_C) { + u32 value; - /* Also update the CPU-side dlist copy, so that any later - * atomic updates that don't do a new modeset on our plane - * also use our updated address. - */ - vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr; + value = vc4_state->dlist[vc4_state->ptr0_offset[0]] & + ~SCALER6_PTR0_UPPER_ADDR_MASK; + value |= VC4_SET_FIELD(upper_32_bits(dma_addr) & 0xff, + SCALER6_PTR0_UPPER_ADDR); + + writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); + vc4_state->dlist[vc4_state->ptr0_offset[0]] = value; + + value = lower_32_bits(dma_addr); + writel(value, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0] + 1]); + vc4_state->dlist[vc4_state->ptr0_offset[0] + 1] = value; + } else { + u32 addr; + + addr = (u32)dma_addr; + + /* Write the new address into the hardware immediately. The + * scanout will start from this address as soon as the FIFO + * needs to refill with pixels. + */ + writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset[0]]); + + /* Also update the CPU-side dlist copy, so that any later + * atomic updates that don't do a new modeset on our plane + * also use our updated address. + */ + vc4_state->dlist[vc4_state->ptr0_offset[0]] = addr; + } drm_dev_exit(idx); } @@ -1543,13 +2340,17 @@ static void vc4_plane_atomic_async_update(struct drm_plane *plane, static int vc4_plane_atomic_async_check(struct drm_plane *plane, struct drm_atomic_state *state) { + struct vc4_dev *vc4 = to_vc4_dev(plane->dev); struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state, plane); struct vc4_plane_state *old_vc4_state, *new_vc4_state; int ret; u32 i; - ret = vc4_plane_mode_set(plane, new_plane_state); + if (vc4->gen <= VC4_GEN_5) + ret = vc4_plane_mode_set(plane, new_plane_state); + else + ret = vc6_plane_mode_set(plane, new_plane_state); if (ret) return ret; @@ -1723,7 +2524,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, }; for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { - if (!hvs_formats[i].hvs5_only || vc4->gen == VC4_GEN_5) { + if (!hvs_formats[i].hvs5_only || vc4->gen >= VC4_GEN_5) { formats[num_formats] = hvs_formats[i].drm; num_formats++; } @@ -1738,7 +2539,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, return ERR_CAST(vc4_plane); plane = &vc4_plane->base; - if (vc4->gen == VC4_GEN_5) + if (vc4->gen >= VC4_GEN_5) drm_plane_helper_add(plane, &vc5_plane_helper_funcs); else drm_plane_helper_add(plane, &vc4_plane_helper_funcs); diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index c55dec383929..27158be19952 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -19,6 +19,20 @@ #define VC4_GET_FIELD(word, field) FIELD_GET(field##_MASK, word) +#define VC6_SET_FIELD(value, field) \ + ({ \ + WARN_ON(!FIELD_FIT(hvs->vc4->gen == VC4_GEN_6_C ? \ + SCALER6_ ## field ## _MASK : \ + SCALER6D_ ## field ## _MASK, value));\ + FIELD_PREP(hvs->vc4->gen == VC4_GEN_6_C ? \ + SCALER6_ ## field ## _MASK : \ + SCALER6D_ ## field ## _MASK, value); \ + }) + +#define VC6_GET_FIELD(word, field) FIELD_GET(hvs->vc4->gen == VC4_GEN_6_C ? \ + SCALER6_ ## field ## _MASK : \ + SCALER6D_ ## field ## _MASK, word) + #define V3D_IDENT0 0x00000 # define V3D_EXPECTED_IDENT0 \ ((2 << 24) | \ @@ -155,6 +169,7 @@ # define PV_CONTROL_EN BIT(0) #define PV_V_CONTROL 0x04 +# define PV_VCONTROL_ODD_TIMING BIT(29) # define PV_VCONTROL_ODD_DELAY_MASK VC4_MASK(22, 6) # define PV_VCONTROL_ODD_DELAY_SHIFT 6 # define PV_VCONTROL_ODD_FIRST BIT(5) @@ -215,6 +230,11 @@ # define PV_MUX_CFG_RGB_PIXEL_MUX_MODE_SHIFT 2 # define PV_MUX_CFG_RGB_PIXEL_MUX_MODE_NO_SWAP 8 +#define PV_PIPE_INIT_CTRL 0x94 +# define PV_PIPE_INIT_CTRL_PV_INIT_WIDTH_MASK VC4_MASK(11, 8) +# define PV_PIPE_INIT_CTRL_PV_INIT_IDLE_MASK VC4_MASK(7, 4) +# define PV_PIPE_INIT_CTRL_PV_INIT_EN BIT(0) + #define SCALER_CHANNELS_COUNT 3 #define SCALER_DISPCTRL 0x00000000 @@ -418,6 +438,10 @@ # define SCALER_DISPSTAT1_FRCNT0_SHIFT 18 # define SCALER_DISPSTAT1_FRCNT1_MASK VC4_MASK(17, 12) # define SCALER_DISPSTAT1_FRCNT1_SHIFT 12 +# define SCALER5_DISPSTAT1_FRCNT0_MASK VC4_MASK(25, 20) +# define SCALER5_DISPSTAT1_FRCNT0_SHIFT 20 +# define SCALER5_DISPSTAT1_FRCNT1_MASK VC4_MASK(19, 14) +# define SCALER5_DISPSTAT1_FRCNT1_SHIFT 14 #define SCALER_DISPSTATX(x) (SCALER_DISPSTAT0 + \ (x) * (SCALER_DISPSTAT1 - \ @@ -436,6 +460,8 @@ #define SCALER_DISPSTAT2 0x00000068 # define SCALER_DISPSTAT2_FRCNT2_MASK VC4_MASK(17, 12) # define SCALER_DISPSTAT2_FRCNT2_SHIFT 12 +# define SCALER5_DISPSTAT2_FRCNT2_MASK VC4_MASK(19, 14) +# define SCALER5_DISPSTAT2_FRCNT2_SHIFT 14 #define SCALER_DISPBASE2 0x0000006c #define SCALER_DISPALPHA2 0x00000070 @@ -514,6 +540,206 @@ #define SCALER5_DLIST_START 0x00004000 +#define SCALER6_VERSION 0x00000000 +# define SCALER6_VERSION_MASK VC4_MASK(7, 0) +# define SCALER6_VERSION_C0 0x00000053 +# define SCALER6_VERSION_D0 0x00000054 +#define SCALER6_CXM_SIZE 0x00000004 +#define SCALER6_LBM_SIZE 0x00000008 +#define SCALER6_UBM_SIZE 0x0000000c +#define SCALER6_COBA_SIZE 0x00000010 +#define SCALER6_COB_SIZE 0x00000014 + +#define SCALER6_CONTROL 0x00000020 +# define SCALER6_CONTROL_HVS_EN BIT(31) +# define SCALER6_CONTROL_PF_LINES_MASK VC4_MASK(22, 18) +# define SCALER6_CONTROL_ABORT_ON_EMPTY BIT(16) +# define SCALER6_CONTROL_DSP1_TARGET_MASK VC4_MASK(13, 12) +# define SCALER6_CONTROL_MAX_REQS_MASK VC4_MASK(7, 4) + +#define SCALER6_FETCHER_STATUS 0x00000024 +#define SCALER6_FETCH_STATUS 0x00000028 +#define SCALER6_HANDLE_ERROR 0x0000002c + +#define SCALER6_DISP0_CTRL0 0x00000030 +#define SCALER6_DISPX_CTRL0(x) ((hvs->vc4->gen == VC4_GEN_6_C) ? \ + (SCALER6_DISP0_CTRL0 + ((x) * (SCALER6_DISP1_CTRL0 - SCALER6_DISP0_CTRL0))) : \ + (SCALER6D_DISP0_CTRL0 + ((x) * (SCALER6D_DISP1_CTRL0 - SCALER6D_DISP0_CTRL0)))) +# define SCALER6_DISPX_CTRL0_ENB BIT(31) +# define SCALER6_DISPX_CTRL0_RESET BIT(30) +# define SCALER6_DISPX_CTRL0_FWIDTH_MASK VC4_MASK(28, 16) +# define SCALER6_DISPX_CTRL0_ONESHOT BIT(15) +# define SCALER6_DISPX_CTRL0_ONECTX_MASK VC4_MASK(14, 13) +# define SCALER6_DISPX_CTRL0_LINES_MASK VC4_MASK(12, 0) + +#define SCALER6_DISP0_CTRL1 0x00000034 +#define SCALER6_DISPX_CTRL1(x) ((hvs->vc4->gen == VC4_GEN_6_C) ? \ + (SCALER6_DISP0_CTRL1 + ((x) * (SCALER6_DISP1_CTRL1 - SCALER6_DISP0_CTRL1))) : \ + (SCALER6D_DISP0_CTRL1 + ((x) * (SCALER6D_DISP1_CTRL1 - SCALER6D_DISP0_CTRL1)))) +# define SCALER6_DISPX_CTRL1_BGENB BIT(8) +# define SCALER6_DISPX_CTRL1_INTLACE BIT(0) + +#define SCALER6_DISP0_BGND 0x00000038 +#define SCALER6_DISPX_BGND(x) ((hvs->vc4->gen == VC4_GEN_6_C) ? \ + (SCALER6_DISP0_BGND + ((x) * (SCALER6_DISP1_BGND - SCALER6_DISP0_BGND))) : \ + (SCALER6D_DISP0_BGND + ((x) * (SCALER6D_DISP1_BGND - SCALER6D_DISP0_BGND)))) + +#define SCALER6_DISP0_LPTRS 0x0000003c +#define SCALER6_DISPX_LPTRS(x) ((hvs->vc4->gen == VC4_GEN_6_C) ? \ + (SCALER6_DISP0_LPTRS + ((x) * (SCALER6_DISP1_LPTRS - SCALER6_DISP0_LPTRS))) : \ + (SCALER6D_DISP0_LPTRS + ((x) * (SCALER6D_DISP1_LPTRS - SCALER6D_DISP0_LPTRS)))) +# define SCALER6_DISPX_LPTRS_HEADE_MASK VC4_MASK(11, 0) + +#define SCALER6_DISP0_COB 0x00000040 +#define SCALER6_DISPX_COB(x) ((hvs->vc4->gen == VC4_GEN_6_C) ? \ + (SCALER6_DISP0_COB + ((x) * (SCALER6_DISP1_COB - SCALER6_DISP0_COB))) : \ + (SCALER6D_DISP0_COB + ((x) * (SCALER6D_DISP1_COB - SCALER6D_DISP0_COB)))) +# define SCALER6_DISPX_COB_TOP_MASK VC4_MASK(31, 16) +# define SCALER6_DISPX_COB_BASE_MASK VC4_MASK(15, 0) + +#define SCALER6_DISP0_STATUS 0x00000044 +#define SCALER6_DISPX_STATUS(x) ((hvs->vc4->gen == VC4_GEN_6_C) ? \ + (SCALER6_DISP0_STATUS + ((x) * (SCALER6_DISP1_STATUS - SCALER6_DISP0_STATUS))) : \ + (SCALER6D_DISP0_STATUS + ((x) * (SCALER6D_DISP1_STATUS - SCALER6D_DISP0_STATUS)))) +# define SCALER6_DISPX_STATUS_EMPTY BIT(22) +# define SCALER6_DISPX_STATUS_FRCNT_MASK VC4_MASK(21, 16) +# define SCALER6_DISPX_STATUS_OFIELD BIT(15) +# define SCALER6_DISPX_STATUS_MODE_MASK VC4_MASK(14, 13) +# define SCALER6_DISPX_STATUS_MODE_DISABLED 0 +# define SCALER6_DISPX_STATUS_MODE_INIT 1 +# define SCALER6_DISPX_STATUS_MODE_RUN 2 +# define SCALER6_DISPX_STATUS_MODE_EOF 3 +# define SCALER6_DISPX_STATUS_YLINE_MASK VC4_MASK(12, 0) + +#define SCALER6_DISP0_DL 0x00000048 + +#define SCALER6_DISPX_DL(x) ((hvs->vc4->gen == VC4_GEN_6_C) ? \ + (SCALER6_DISP0_DL + ((x) * (SCALER6_DISP1_DL - SCALER6_DISP0_DL))) : \ + (SCALER6D_DISP0_DL + ((x) * (SCALER6D_DISP1_DL - SCALER6D_DISP0_DL)))) +# define SCALER6_DISPX_DL_LACT_MASK VC4_MASK(11, 0) + +#define SCALER6_DISP0_RUN 0x0000004c +#define SCALER6_DISP1_CTRL0 0x00000050 +#define SCALER6_DISP1_CTRL1 0x00000054 +#define SCALER6_DISP1_BGND 0x00000058 +#define SCALER6_DISP1_LPTRS 0x0000005c +#define SCALER6_DISP1_COB 0x00000060 +#define SCALER6_DISP1_STATUS 0x00000064 +#define SCALER6_DISP1_DL 0x00000068 +#define SCALER6_DISP1_RUN 0x0000006c +#define SCALER6_DISP2_CTRL0 0x00000070 +#define SCALER6_DISP2_CTRL1 0x00000074 +#define SCALER6_DISP2_BGND 0x00000078 +#define SCALER6_DISP2_LPTRS 0x0000007c +#define SCALER6_DISP2_COB 0x00000080 +#define SCALER6_DISP2_STATUS 0x00000084 +#define SCALER6_DISP2_DL 0x00000088 +#define SCALER6_DISP2_RUN 0x0000008c +#define SCALER6_EOLN 0x00000090 +#define SCALER6_DL_STATUS 0x00000094 +#define SCALER6_BFG_MISC 0x0000009c +#define SCALER6_QOS0 0x000000a0 +#define SCALER6_PROF0 0x000000a4 +#define SCALER6_QOS1 0x000000a8 +#define SCALER6_PROF1 0x000000ac +#define SCALER6_QOS2 0x000000b0 +#define SCALER6_PROF2 0x000000b4 +#define SCALER6_PRI_MAP0 0x000000b8 +#define SCALER6_PRI_MAP1 0x000000bc +#define SCALER6_HISTCTRL 0x000000c0 +#define SCALER6_HISTBIN0 0x000000c4 +#define SCALER6_HISTBIN1 0x000000c8 +#define SCALER6_HISTBIN2 0x000000cc +#define SCALER6_HISTBIN3 0x000000d0 +#define SCALER6_HISTBIN4 0x000000d4 +#define SCALER6_HISTBIN5 0x000000d8 +#define SCALER6_HISTBIN6 0x000000dc +#define SCALER6_HISTBIN7 0x000000e0 +#define SCALER6_HDR_CFG_REMAP 0x000000f4 +#define SCALER6_COL_SPACE 0x000000f8 +#define SCALER6_HVS_ID 0x000000fc +#define SCALER6_CFC1 0x00000100 +#define SCALER6_DISP_UPM_ISO0 0x00000200 +#define SCALER6_DISP_UPM_ISO1 0x00000204 +#define SCALER6_DISP_UPM_ISO2 0x00000208 +#define SCALER6_DISP_LBM_ISO0 0x0000020c +#define SCALER6_DISP_LBM_ISO1 0x00000210 +#define SCALER6_DISP_LBM_ISO2 0x00000214 +#define SCALER6_DISP_COB_ISO0 0x00000218 +#define SCALER6_DISP_COB_ISO1 0x0000021c +#define SCALER6_DISP_COB_ISO2 0x00000220 +#define SCALER6_BAD_COB 0x00000224 +#define SCALER6_BAD_LBM 0x00000228 +#define SCALER6_BAD_UPM 0x0000022c +#define SCALER6_BAD_AXI 0x00000230 + +#define SCALER6D_VERSION 0x00000000 +#define SCALER6D_CXM_SIZE 0x00000004 +#define SCALER6D_LBM_SIZE 0x00000008 +#define SCALER6D_UBM_SIZE 0x0000000c +#define SCALER6D_COBA_SIZE 0x00000010 +#define SCALER6D_COB_SIZE 0x00000014 +#define SCALER6D_CONTROL 0x00000020 +#define SCALER6D_FETCHER_STATUS 0x00000024 +#define SCALER6D_FETCH_STATUS 0x00000028 +#define SCALER6D_HANDLE_ERROR 0x0000002c +#define SCALER6D_EOLN 0x00000030 +#define SCALER6D_DL_STATUS 0x00000034 +#define SCALER6D_PRI_MAP0 0x00000038 +#define SCALER6D_PRI_MAP1 0x0000003c +#define SCALER6D_HISTCTRL 0x000000d0 +#define SCALER6D_HISTBIN0 0x000000d4 +#define SCALER6D_HISTBIN1 0x000000d8 +#define SCALER6D_HISTBIN2 0x000000dc +#define SCALER6D_HISTBIN3 0x000000e0 +#define SCALER6D_HISTBIN4 0x000000e4 +#define SCALER6D_HISTBIN5 0x000000e8 +#define SCALER6D_HISTBIN6 0x000000ec +#define SCALER6D_HISTBIN7 0x000000f0 +#define SCALER6D_HVS_ID 0x000000fc + +#define SCALER6D_DISP0_CTRL0 0x00000100 +#define SCALER6D_DISP0_CTRL1 0x00000104 +#define SCALER6D_DISP0_BGND 0x00000108 +#define SCALER6D_DISP0_LPTRS 0x00000110 +#define SCALER6D_DISP0_COB 0x00000114 +#define SCALER6D_DISP0_STATUS 0x00000118 +#define SCALER6D_DISP0_CTRL0 0x00000100 +#define SCALER6D_DISP0_CTRL1 0x00000104 +#define SCALER6D_DISP0_BGND0 0x00000108 +#define SCALER6D_DISP0_BGND1 0x0000010c +#define SCALER6D_DISP0_LPTRS 0x00000110 +#define SCALER6D_DISP0_COB 0x00000114 +#define SCALER6D_DISP0_STATUS 0x00000118 +#define SCALER6D_DISP0_DL 0x0000011c +#define SCALER6D_DISP0_RUN 0x00000120 +#define SCALER6D_QOS0 0x00000124 +#define SCALER6D_PROF0 0x00000128 +#define SCALER6D_DISP1_CTRL0 0x00000140 +#define SCALER6D_DISP1_CTRL1 0x00000144 +#define SCALER6D_DISP1_BGND0 0x00000148 +#define SCALER6D_DISP1_BGND1 0x0000014c +#define SCALER6D_DISP1_LPTRS 0x00000150 +#define SCALER6D_DISP1_COB 0x00000154 +#define SCALER6D_DISP1_STATUS 0x00000158 +#define SCALER6D_DISP1_DL 0x0000015c +#define SCALER6D_DISP1_RUN 0x00000160 +#define SCALER6D_QOS1 0x00000164 +#define SCALER6D_PROF1 0x00000168 +#define SCALER6D_DISP2_CTRL0 0x00000180 +#define SCALER6D_DISP2_CTRL1 0x00000184 +#define SCALER6D_DISP2_BGND0 0x00000188 +#define SCALER6D_DISP2_BGND1 0x0000018c +#define SCALER6D_DISP2_LPTRS 0x00000190 +#define SCALER6D_DISP2_COB 0x00000194 +#define SCALER6D_DISP2_STATUS 0x00000198 +#define SCALER6D_DISP2_DL 0x0000019c +#define SCALER6D_DISP2_RUN 0x000001a0 +#define SCALER6D_QOS2 0x000001a4 +#define SCALER6D_PROF2 0x000001a8 + +#define SCALER6(x) ((hvs->vc4->gen == VC4_GEN_6_C) ? SCALER6_ ## x : SCALER6D_ ## x) + # define VC4_HDMI_SW_RESET_FORMAT_DETECT BIT(1) # define VC4_HDMI_SW_RESET_HDMI BIT(0) @@ -761,6 +987,15 @@ enum { # define VC4_HD_MAI_THR_DREQLOW_MASK VC4_MASK(5, 0) # define VC4_HD_MAI_THR_DREQLOW_SHIFT 0 +# define VC6_D_HD_MAI_THR_PANICHIGH_MASK VC4_MASK(29, 23) +# define VC6_D_HD_MAI_THR_PANICHIGH_SHIFT 23 +# define VC6_D_HD_MAI_THR_PANICLOW_MASK VC4_MASK(21, 15) +# define VC6_D_HD_MAI_THR_PANICLOW_SHIFT 15 +# define VC6_D_HD_MAI_THR_DREQHIGH_MASK VC4_MASK(13, 7) +# define VC6_D_HD_MAI_THR_DREQHIGH_SHIFT 7 +# define VC6_D_HD_MAI_THR_DREQLOW_MASK VC4_MASK(6, 0) +# define VC6_D_HD_MAI_THR_DREQLOW_SHIFT 0 + /* Divider from HDMI HSM clock to MAI serial clock. Sampling period * converges to N / (M + 1) cycles. */ @@ -968,6 +1203,9 @@ enum hvs_pixel_format { #define SCALER5_CTL2_ALPHA_MASK VC4_MASK(15, 4) #define SCALER5_CTL2_ALPHA_SHIFT 4 +#define SCALER6D_CTL2_CSC_ENABLE BIT(19) +#define SCALER6D_CTL2_BRCM_CFC_CONTROL_MASK VC4_MASK(22, 20) + #define SCALER_POS1_SCL_HEIGHT_MASK VC4_MASK(27, 16) #define SCALER_POS1_SCL_HEIGHT_SHIFT 16 @@ -1109,4 +1347,63 @@ enum hvs_pixel_format { #define SCALER_PITCH0_TILE_WIDTH_R_MASK VC4_MASK(6, 0) #define SCALER_PITCH0_TILE_WIDTH_R_SHIFT 0 +#define SCALER6_CTL0_END BIT(31) +#define SCALER6_CTL0_VALID BIT(30) +#define SCALER6_CTL0_NEXT_MASK VC4_MASK(29, 24) +#define SCALER6_CTL0_RGB_TRANS BIT(23) +#define SCALER6_CTL0_ADDR_MODE_MASK VC4_MASK(22, 20) +#define SCALER6_CTL0_ADDR_MODE_LINEAR 0 +#define SCALER6_CTL0_ADDR_MODE_128B 1 +#define SCALER6_CTL0_ADDR_MODE_256B 2 +#define SCALER6_CTL0_ADDR_MODE_MAP8 3 +#define SCALER6_CTL0_ADDR_MODE_UIF 4 + +#define SCALER6_CTL0_ALPHA_MASK_MASK VC4_MASK(19, 18) +#define SCALER6_CTL0_ALPHA_MASK_NONE 0 +#define SCALER6D_CTL0_ALPHA_MASK_FIXED 3 +#define SCALER6_CTL0_UNITY BIT(15) +#define SCALER6_CTL0_ORDERRGBA_MASK VC4_MASK(14, 13) +#define SCALER6_CTL0_SCL1_MODE_MASK VC4_MASK(10, 8) +#define SCALER6_CTL0_SCL0_MODE_MASK VC4_MASK(7, 5) +#define SCALER6_CTL0_PIXEL_FORMAT_MASK VC4_MASK(4, 0) + +#define SCALER6_POS0_START_Y_MASK VC4_MASK(28, 16) +#define SCALER6_POS0_HFLIP BIT(15) +#define SCALER6_POS0_START_X_MASK VC4_MASK(12, 0) + +#define SCALER6_CTL2_ALPHA_MODE_MASK VC4_MASK(31, 30) +#define SCALER6_CTL2_ALPHA_PREMULT BIT(29) +#define SCALER6_CTL2_ALPHA_MIX BIT(28) +#define SCALER6_CTL2_BFG BIT(26) +#define SCALER6C_CTL2_CSC_ENABLE BIT(25) +#define SCALER6C_CTL2_BRCM_CFC_CONTROL_MASK VC4_MASK(18, 16) +#define SCALER6_CTL2_ALPHA_MASK VC4_MASK(15, 4) + +#define SCALER6_POS1_SCL_LINES_MASK VC4_MASK(28, 16) +#define SCALER6_POS1_SCL_WIDTH_MASK VC4_MASK(12, 0) + +#define SCALER6_POS2_SRC_LINES_MASK VC4_MASK(28, 16) +#define SCALER6_POS2_SRC_WIDTH_MASK VC4_MASK(12, 0) + +#define SCALER6_PTR0_VFLIP BIT(31) +#define SCALER6_PTR0_UPM_BASE_MASK VC4_MASK(28, 16) +#define SCALER6_PTR0_UPM_HANDLE_MASK VC4_MASK(14, 10) +#define SCALER6_PTR0_UPM_BUFF_SIZE_MASK VC4_MASK(9, 8) +#define SCALER6_PTR0_UPM_BUFF_SIZE_16_LINES 3 +#define SCALER6_PTR0_UPM_BUFF_SIZE_8_LINES 2 +#define SCALER6_PTR0_UPM_BUFF_SIZE_4_LINES 1 +#define SCALER6_PTR0_UPM_BUFF_SIZE_2_LINES 0 +#define SCALER6_PTR0_UPPER_ADDR_MASK VC4_MASK(7, 0) + +#define SCALER6_PTR2_ALPHA_BPP_MASK VC4_MASK(31, 31) +#define SCALER6_PTR2_ALPHA_BPP_1BPP 1 +#define SCALER6_PTR2_ALPHA_BPP_8BPP 0 +#define SCALER6_PTR2_ALPHA_ORDER_MASK VC4_MASK(30, 30) +#define SCALER6_PTR2_ALPHA_ORDER_MSB_TO_LSB 1 +#define SCALER6_PTR2_ALPHA_ORDER_LSB_TO_MSB 0 +#define SCALER6_PTR2_ALPHA_OFFS_MASK VC4_MASK(29, 27) +#define SCALER6_PTR2_LSKIP_MASK VC4_MASK(26, 24) +#define SCALER6_PTR2_PITCH_MASK VC4_MASK(16, 0) +#define SCALER6_PTR2_FETCH_COUNT_MASK VC4_MASK(26, 16) + #endif /* VC4_REGS_H */ diff --git a/drivers/gpu/drm/vc4/vc4_txp.c b/drivers/gpu/drm/vc4/vc4_txp.c index 3e38a1d2d55e..4eab069cda75 100644 --- a/drivers/gpu/drm/vc4/vc4_txp.c +++ b/drivers/gpu/drm/vc4/vc4_txp.c @@ -145,6 +145,9 @@ /* Number of lines received and committed to memory. */ #define TXP_PROGRESS 0x10 +#define TXP_DST_PTR_HIGH_MOPLET 0x1c +#define TXP_DST_PTR_HIGH_MOP 0x24 + #define TXP_READ(offset) \ ({ \ kunit_fail_current_test("Accessing a register in a unit test!\n"); \ @@ -159,6 +162,7 @@ struct vc4_txp { struct vc4_crtc base; + const struct vc4_txp_data *data; struct platform_device *pdev; @@ -286,9 +290,13 @@ static void vc4_txp_connector_atomic_commit(struct drm_connector *conn, struct drm_connector_state *conn_state = drm_atomic_get_new_connector_state(state, conn); struct vc4_txp *txp = connector_to_vc4_txp(conn); + const struct vc4_txp_data *txp_data = txp->data; struct drm_gem_dma_object *gem; struct drm_display_mode *mode; struct drm_framebuffer *fb; + unsigned int hdisplay; + unsigned int vdisplay; + dma_addr_t addr; u32 ctrl; int idx; int i; @@ -308,9 +316,11 @@ static void vc4_txp_connector_atomic_commit(struct drm_connector *conn, return; ctrl = TXP_GO | TXP_EI | - VC4_SET_FIELD(0xf, TXP_BYTE_ENABLE) | VC4_SET_FIELD(txp_fmts[i], TXP_FORMAT); + if (txp_data->has_byte_enable) + ctrl |= VC4_SET_FIELD(0xf, TXP_BYTE_ENABLE); + if (fb->format->has_alpha) ctrl |= TXP_ALPHA_ENABLE; else @@ -324,11 +334,25 @@ static void vc4_txp_connector_atomic_commit(struct drm_connector *conn, return; gem = drm_fb_dma_get_gem_obj(fb, 0); - TXP_WRITE(TXP_DST_PTR, gem->dma_addr + fb->offsets[0]); + addr = gem->dma_addr + fb->offsets[0]; + + TXP_WRITE(TXP_DST_PTR, lower_32_bits(addr)); + + if (txp_data->supports_40bit_addresses) + TXP_WRITE(txp_data->high_addr_ptr_reg, upper_32_bits(addr) & 0xff); + TXP_WRITE(TXP_DST_PITCH, fb->pitches[0]); + + hdisplay = mode->hdisplay ?: 1; + vdisplay = mode->vdisplay ?: 1; + if (txp_data->size_minus_one) { + hdisplay -= 1; + vdisplay -= 1; + } + TXP_WRITE(TXP_DIM, - VC4_SET_FIELD(mode->hdisplay, TXP_WIDTH) | - VC4_SET_FIELD(mode->vdisplay, TXP_HEIGHT)); + VC4_SET_FIELD(hdisplay, TXP_WIDTH) | + VC4_SET_FIELD(vdisplay, TXP_HEIGHT)); TXP_WRITE(TXP_DST_CTRL, ctrl); @@ -362,6 +386,7 @@ static const struct drm_connector_funcs vc4_txp_connector_funcs = { static void vc4_txp_encoder_disable(struct drm_encoder *encoder) { struct drm_device *drm = encoder->dev; + struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_txp *txp = encoder_to_vc4_txp(encoder); int idx; @@ -380,7 +405,8 @@ static void vc4_txp_encoder_disable(struct drm_encoder *encoder) WARN_ON(TXP_READ(TXP_DST_CTRL) & TXP_BUSY); } - TXP_WRITE(TXP_DST_CTRL, TXP_POWERDOWN); + if (vc4->gen < VC4_GEN_6_C) + TXP_WRITE(TXP_DST_CTRL, TXP_POWERDOWN); drm_dev_exit(idx); } @@ -484,17 +510,49 @@ static irqreturn_t vc4_txp_interrupt(int irq, void *data) return IRQ_HANDLED; } -const struct vc4_crtc_data vc4_txp_crtc_data = { - .name = "txp", - .debugfs_name = "txp_regs", - .hvs_available_channels = BIT(2), - .hvs_output = 2, +static const struct vc4_txp_data bcm2712_mop_data = { + .base = { + .name = "mop", + .debugfs_name = "mop_regs", + .hvs_available_channels = BIT(2), + .hvs_output = 2, + }, + .encoder_type = VC4_ENCODER_TYPE_TXP0, + .high_addr_ptr_reg = TXP_DST_PTR_HIGH_MOP, + .has_byte_enable = true, + .size_minus_one = true, + .supports_40bit_addresses = true, +}; + +static const struct vc4_txp_data bcm2712_moplet_data = { + .base = { + .name = "moplet", + .debugfs_name = "moplet_regs", + .hvs_available_channels = BIT(1), + .hvs_output = 4, + }, + .encoder_type = VC4_ENCODER_TYPE_TXP1, + .high_addr_ptr_reg = TXP_DST_PTR_HIGH_MOPLET, + .size_minus_one = true, + .supports_40bit_addresses = true, +}; + +const struct vc4_txp_data bcm2835_txp_data = { + .base = { + .name = "txp", + .debugfs_name = "txp_regs", + .hvs_available_channels = BIT(2), + .hvs_output = 2, + }, + .encoder_type = VC4_ENCODER_TYPE_TXP0, + .has_byte_enable = true, }; static int vc4_txp_bind(struct device *dev, struct device *master, void *data) { struct platform_device *pdev = to_platform_device(dev); struct drm_device *drm = dev_get_drvdata(master); + const struct vc4_txp_data *txp_data; struct vc4_encoder *vc4_encoder; struct drm_encoder *encoder; struct vc4_crtc *vc4_crtc; @@ -509,6 +567,11 @@ static int vc4_txp_bind(struct device *dev, struct device *master, void *data) if (!txp) return -ENOMEM; + txp_data = of_device_get_match_data(dev); + if (!txp_data) + return -ENODEV; + + txp->data = txp_data; txp->pdev = pdev; txp->regs = vc4_ioremap_regs(pdev, 0); if (IS_ERR(txp->regs)) @@ -519,13 +582,13 @@ static int vc4_txp_bind(struct device *dev, struct device *master, void *data) vc4_crtc->regset.regs = txp_regs; vc4_crtc->regset.nregs = ARRAY_SIZE(txp_regs); - ret = vc4_crtc_init(drm, pdev, vc4_crtc, &vc4_txp_crtc_data, + ret = vc4_crtc_init(drm, pdev, vc4_crtc, &txp_data->base, &vc4_txp_crtc_funcs, &vc4_txp_crtc_helper_funcs, true); if (ret) return ret; vc4_encoder = &txp->encoder; - txp->encoder.type = VC4_ENCODER_TYPE_TXP; + txp->encoder.type = txp_data->encoder_type; encoder = &vc4_encoder->base; encoder->possible_crtcs = drm_crtc_mask(&vc4_crtc->base); @@ -579,7 +642,9 @@ static void vc4_txp_remove(struct platform_device *pdev) } static const struct of_device_id vc4_txp_dt_match[] = { - { .compatible = "brcm,bcm2835-txp" }, + { .compatible = "brcm,bcm2712-mop", .data = &bcm2712_mop_data }, + { .compatible = "brcm,bcm2712-moplet", .data = &bcm2712_moplet_data }, + { .compatible = "brcm,bcm2835-txp", .data = &bcm2835_txp_data }, { /* sentinel */ }, }; diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index c5e3e5457737..2752ab4f1c97 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -47,7 +47,6 @@ #define DRIVER_NAME "vgem" #define DRIVER_DESC "Virtual GEM provider" -#define DRIVER_DATE "20120112" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 @@ -121,7 +120,6 @@ static const struct drm_driver vgem_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, }; diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c index ffca6e2e1c9a..6a67c6297d58 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.c +++ b/drivers/gpu/drm/virtio/virtgpu_drv.c @@ -32,9 +32,9 @@ #include <linux/poll.h> #include <linux/wait.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_shmem.h> #include <drm/drm_file.h> @@ -184,7 +184,6 @@ static const struct drm_driver driver = { .postclose = virtio_gpu_driver_postclose, .dumb_create = virtio_gpu_mode_dumb_create, - .dumb_map_offset = virtio_gpu_mode_dumb_mmap, DRM_FBDEV_SHMEM_DRIVER_OPS, @@ -202,7 +201,6 @@ static const struct drm_driver driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h index 64c236169db8..f42ca9d8ed10 100644 --- a/drivers/gpu/drm/virtio/virtgpu_drv.h +++ b/drivers/gpu/drm/virtio/virtgpu_drv.h @@ -45,7 +45,6 @@ #define DRIVER_NAME "virtio_gpu" #define DRIVER_DESC "virtio GPU" -#define DRIVER_DATE "0" #define DRIVER_MAJOR 0 #define DRIVER_MINOR 1 @@ -89,9 +88,11 @@ struct virtio_gpu_object_params { struct virtio_gpu_object { struct drm_gem_shmem_object base; + struct sg_table *sgt; uint32_t hw_res_handle; bool dumb; bool created; + bool attached; bool host3d_blob, guest_blob; uint32_t blob_mem, blob_flags; @@ -194,6 +195,13 @@ struct virtio_gpu_framebuffer { #define to_virtio_gpu_framebuffer(x) \ container_of(x, struct virtio_gpu_framebuffer, base) +struct virtio_gpu_plane_state { + struct drm_plane_state base; + struct virtio_gpu_fence *fence; +}; +#define to_virtio_gpu_plane_state(x) \ + container_of(x, struct virtio_gpu_plane_state, base) + struct virtio_gpu_queue { struct virtqueue *vq; spinlock_t qlock; @@ -301,9 +309,6 @@ void virtio_gpu_gem_object_close(struct drm_gem_object *obj, int virtio_gpu_mode_dumb_create(struct drm_file *file_priv, struct drm_device *dev, struct drm_mode_create_dumb *args); -int virtio_gpu_mode_dumb_mmap(struct drm_file *file_priv, - struct drm_device *dev, - uint32_t handle, uint64_t *offset_p); struct virtio_gpu_object_array *virtio_gpu_array_alloc(u32 nents); struct virtio_gpu_object_array* @@ -349,6 +354,10 @@ void virtio_gpu_object_attach(struct virtio_gpu_device *vgdev, struct virtio_gpu_object *obj, struct virtio_gpu_mem_entry *ents, unsigned int nents); +void virtio_gpu_object_detach(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object *obj, + struct virtio_gpu_fence *fence); +int virtio_gpu_detach_object_fenced(struct virtio_gpu_object *bo); void virtio_gpu_cursor_ping(struct virtio_gpu_device *vgdev, struct virtio_gpu_output *output); int virtio_gpu_cmd_get_display_info(struct virtio_gpu_device *vgdev); @@ -468,6 +477,10 @@ struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev, struct drm_gem_object *virtgpu_gem_prime_import_sg_table( struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sgt); +int virtgpu_dma_buf_import_sgt(struct virtio_gpu_mem_entry **ents, + unsigned int *nents, + struct virtio_gpu_object *bo, + struct dma_buf_attachment *attach); /* virtgpu_debugfs.c */ void virtio_gpu_debugfs_init(struct drm_minor *minor); diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index 7db48d17ee3a..5aab588fc400 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -99,21 +99,6 @@ fail: return ret; } -int virtio_gpu_mode_dumb_mmap(struct drm_file *file_priv, - struct drm_device *dev, - uint32_t handle, uint64_t *offset_p) -{ - struct drm_gem_object *gobj; - - BUG_ON(!offset_p); - gobj = drm_gem_object_lookup(file_priv, handle); - if (gobj == NULL) - return -ENOENT; - *offset_p = drm_vma_node_offset_addr(&gobj->vma_node); - drm_gem_object_put(gobj); - return 0; -} - int virtio_gpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file) { @@ -127,15 +112,17 @@ int virtio_gpu_gem_object_open(struct drm_gem_object *obj, /* the context might still be missing when the first ioctl is * DRM_IOCTL_MODE_CREATE_DUMB or DRM_IOCTL_PRIME_FD_TO_HANDLE */ - virtio_gpu_create_context(obj->dev, file); + if (!vgdev->has_context_init) + virtio_gpu_create_context(obj->dev, file); objs = virtio_gpu_array_alloc(1); if (!objs) return -ENOMEM; virtio_gpu_array_add_obj(objs, obj); - virtio_gpu_cmd_context_attach_resource(vgdev, vfpriv->ctx_id, - objs); + if (vfpriv->ctx_id) + virtio_gpu_cmd_context_attach_resource(vgdev, vfpriv->ctx_id, objs); + out_notify: virtio_gpu_notify(vgdev); return 0; diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index e4f76f315550..c33c057365f8 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -80,9 +80,9 @@ static int virtio_gpu_map_ioctl(struct drm_device *dev, void *data, struct virtio_gpu_device *vgdev = dev->dev_private; struct drm_virtgpu_map *virtio_gpu_map = data; - return virtio_gpu_mode_dumb_mmap(file, vgdev->ddev, - virtio_gpu_map->handle, - &virtio_gpu_map->offset); + return drm_gem_dumb_map_offset(file, vgdev->ddev, + virtio_gpu_map->handle, + &virtio_gpu_map->offset); } static int virtio_gpu_getparam_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index c7e74cf13022..5517cff8715c 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -80,6 +80,9 @@ void virtio_gpu_cleanup_object(struct virtio_gpu_object *bo) drm_gem_free_mmap_offset(&vram->base.base.base); drm_gem_object_release(&vram->base.base.base); kfree(vram); + } else { + drm_gem_object_release(&bo->base.base); + kfree(bo); } } @@ -97,6 +100,27 @@ static void virtio_gpu_free_object(struct drm_gem_object *obj) virtio_gpu_cleanup_object(bo); } +int virtio_gpu_detach_object_fenced(struct virtio_gpu_object *bo) +{ + struct virtio_gpu_device *vgdev = bo->base.base.dev->dev_private; + struct virtio_gpu_fence *fence; + + if (!bo->attached) + return 0; + + fence = virtio_gpu_fence_alloc(vgdev, vgdev->fence_drv.context, 0); + if (!fence) + return -ENOMEM; + + virtio_gpu_object_detach(vgdev, bo, fence); + virtio_gpu_notify(vgdev); + + dma_fence_wait(&fence->f, false); + dma_fence_put(&fence->f); + + return 0; +} + static const struct drm_gem_object_funcs virtio_gpu_shmem_funcs = { .free = virtio_gpu_free_object, .open = virtio_gpu_gem_object_open, diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c index a72a2dbda031..42aa554eca9f 100644 --- a/drivers/gpu/drm/virtio/virtgpu_plane.c +++ b/drivers/gpu/drm/virtio/virtgpu_plane.c @@ -26,6 +26,8 @@ #include <drm/drm_atomic_helper.h> #include <drm/drm_damage_helper.h> #include <drm/drm_fourcc.h> +#include <drm/drm_gem_atomic_helper.h> +#include <linux/virtio_dma_buf.h> #include "virtgpu_drv.h" @@ -66,11 +68,28 @@ uint32_t virtio_gpu_translate_format(uint32_t drm_fourcc) return format; } +static struct +drm_plane_state *virtio_gpu_plane_duplicate_state(struct drm_plane *plane) +{ + struct virtio_gpu_plane_state *new; + + if (WARN_ON(!plane->state)) + return NULL; + + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return NULL; + + __drm_atomic_helper_plane_duplicate_state(plane, &new->base); + + return &new->base; +} + static const struct drm_plane_funcs virtio_gpu_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .reset = drm_atomic_helper_plane_reset, - .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, + .atomic_duplicate_state = virtio_gpu_plane_duplicate_state, .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, }; @@ -138,11 +157,13 @@ static void virtio_gpu_resource_flush(struct drm_plane *plane, struct drm_device *dev = plane->dev; struct virtio_gpu_device *vgdev = dev->dev_private; struct virtio_gpu_framebuffer *vgfb; + struct virtio_gpu_plane_state *vgplane_st; struct virtio_gpu_object *bo; vgfb = to_virtio_gpu_framebuffer(plane->state->fb); + vgplane_st = to_virtio_gpu_plane_state(plane->state); bo = gem_to_virtio_gpu_obj(vgfb->base.obj[0]); - if (vgfb->fence) { + if (vgplane_st->fence) { struct virtio_gpu_object_array *objs; objs = virtio_gpu_array_alloc(1); @@ -151,13 +172,11 @@ static void virtio_gpu_resource_flush(struct drm_plane *plane, virtio_gpu_array_add_obj(objs, vgfb->base.obj[0]); virtio_gpu_array_lock_resv(objs); virtio_gpu_cmd_resource_flush(vgdev, bo->hw_res_handle, x, y, - width, height, objs, vgfb->fence); + width, height, objs, + vgplane_st->fence); virtio_gpu_notify(vgdev); - - dma_fence_wait_timeout(&vgfb->fence->f, true, + dma_fence_wait_timeout(&vgplane_st->fence->f, true, msecs_to_jiffies(50)); - dma_fence_put(&vgfb->fence->f); - vgfb->fence = NULL; } else { virtio_gpu_cmd_resource_flush(vgdev, bo->hw_res_handle, x, y, width, height, NULL, NULL); @@ -241,45 +260,113 @@ static void virtio_gpu_primary_plane_update(struct drm_plane *plane, rect.y2 - rect.y1); } +static int virtio_gpu_prepare_imported_obj(struct drm_plane *plane, + struct drm_plane_state *new_state, + struct drm_gem_object *obj) +{ + struct virtio_gpu_device *vgdev = plane->dev->dev_private; + struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj); + struct dma_buf_attachment *attach = obj->import_attach; + struct dma_resv *resv = attach->dmabuf->resv; + struct virtio_gpu_mem_entry *ents = NULL; + unsigned int nents; + int ret; + + dma_resv_lock(resv, NULL); + + ret = dma_buf_pin(attach); + if (ret) { + dma_resv_unlock(resv); + return ret; + } + + if (!bo->sgt) { + ret = virtgpu_dma_buf_import_sgt(&ents, &nents, + bo, attach); + if (ret) + goto err; + + virtio_gpu_object_attach(vgdev, bo, ents, nents); + } + + dma_resv_unlock(resv); + return 0; + +err: + dma_buf_unpin(attach); + dma_resv_unlock(resv); + return ret; +} + static int virtio_gpu_plane_prepare_fb(struct drm_plane *plane, struct drm_plane_state *new_state) { struct drm_device *dev = plane->dev; struct virtio_gpu_device *vgdev = dev->dev_private; struct virtio_gpu_framebuffer *vgfb; + struct virtio_gpu_plane_state *vgplane_st; struct virtio_gpu_object *bo; + struct drm_gem_object *obj; + int ret; if (!new_state->fb) return 0; vgfb = to_virtio_gpu_framebuffer(new_state->fb); + vgplane_st = to_virtio_gpu_plane_state(new_state); bo = gem_to_virtio_gpu_obj(vgfb->base.obj[0]); + + drm_gem_plane_helper_prepare_fb(plane, new_state); + if (!bo || (plane->type == DRM_PLANE_TYPE_PRIMARY && !bo->guest_blob)) return 0; - if (bo->dumb && (plane->state->fb != new_state->fb)) { - vgfb->fence = virtio_gpu_fence_alloc(vgdev, vgdev->fence_drv.context, + obj = new_state->fb->obj[0]; + if (obj->import_attach) { + ret = virtio_gpu_prepare_imported_obj(plane, new_state, obj); + if (ret) + return ret; + } + + if (bo->dumb || obj->import_attach) { + vgplane_st->fence = virtio_gpu_fence_alloc(vgdev, + vgdev->fence_drv.context, 0); - if (!vgfb->fence) + if (!vgplane_st->fence) return -ENOMEM; } return 0; } +static void virtio_gpu_cleanup_imported_obj(struct drm_gem_object *obj) +{ + struct dma_buf_attachment *attach = obj->import_attach; + struct dma_resv *resv = attach->dmabuf->resv; + + dma_resv_lock(resv, NULL); + dma_buf_unpin(attach); + dma_resv_unlock(resv); +} + static void virtio_gpu_plane_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *state) { - struct virtio_gpu_framebuffer *vgfb; + struct virtio_gpu_plane_state *vgplane_st; + struct drm_gem_object *obj; if (!state->fb) return; - vgfb = to_virtio_gpu_framebuffer(state->fb); - if (vgfb->fence) { - dma_fence_put(&vgfb->fence->f); - vgfb->fence = NULL; + vgplane_st = to_virtio_gpu_plane_state(state); + if (vgplane_st->fence) { + dma_fence_put(&vgplane_st->fence->f); + vgplane_st->fence = NULL; } + + obj = state->fb->obj[0]; + if (obj->import_attach) + virtio_gpu_cleanup_imported_obj(obj); } static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, @@ -291,6 +378,7 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, struct virtio_gpu_device *vgdev = dev->dev_private; struct virtio_gpu_output *output = NULL; struct virtio_gpu_framebuffer *vgfb; + struct virtio_gpu_plane_state *vgplane_st; struct virtio_gpu_object *bo = NULL; uint32_t handle; @@ -303,6 +391,7 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, if (plane->state->fb) { vgfb = to_virtio_gpu_framebuffer(plane->state->fb); + vgplane_st = to_virtio_gpu_plane_state(plane->state); bo = gem_to_virtio_gpu_obj(vgfb->base.obj[0]); handle = bo->hw_res_handle; } else { @@ -322,11 +411,9 @@ static void virtio_gpu_cursor_plane_update(struct drm_plane *plane, (vgdev, 0, plane->state->crtc_w, plane->state->crtc_h, - 0, 0, objs, vgfb->fence); + 0, 0, objs, vgplane_st->fence); virtio_gpu_notify(vgdev); - dma_fence_wait(&vgfb->fence->f, true); - dma_fence_put(&vgfb->fence->f); - vgfb->fence = NULL; + dma_fence_wait(&vgplane_st->fence->f, true); } if (plane->state->fb != old_state->fb) { diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c index 44425f20d91a..b3664c12843d 100644 --- a/drivers/gpu/drm/virtio/virtgpu_prime.c +++ b/drivers/gpu/drm/virtio/virtgpu_prime.c @@ -27,6 +27,8 @@ #include "virtgpu_drv.h" +MODULE_IMPORT_NS("DMA_BUF"); + static int virtgpu_virtio_get_uuid(struct dma_buf *buf, uuid_t *uuid) { @@ -142,10 +144,159 @@ struct dma_buf *virtgpu_gem_prime_export(struct drm_gem_object *obj, return buf; } +int virtgpu_dma_buf_import_sgt(struct virtio_gpu_mem_entry **ents, + unsigned int *nents, + struct virtio_gpu_object *bo, + struct dma_buf_attachment *attach) +{ + struct scatterlist *sl; + struct sg_table *sgt; + long i, ret; + + dma_resv_assert_held(attach->dmabuf->resv); + + ret = dma_resv_wait_timeout(attach->dmabuf->resv, + DMA_RESV_USAGE_KERNEL, + false, MAX_SCHEDULE_TIMEOUT); + if (ret <= 0) + return ret < 0 ? ret : -ETIMEDOUT; + + sgt = dma_buf_map_attachment(attach, DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); + + *ents = kvmalloc_array(sgt->nents, + sizeof(struct virtio_gpu_mem_entry), + GFP_KERNEL); + if (!(*ents)) { + dma_buf_unmap_attachment(attach, sgt, DMA_BIDIRECTIONAL); + return -ENOMEM; + } + + *nents = sgt->nents; + for_each_sgtable_dma_sg(sgt, sl, i) { + (*ents)[i].addr = cpu_to_le64(sg_dma_address(sl)); + (*ents)[i].length = cpu_to_le32(sg_dma_len(sl)); + (*ents)[i].padding = 0; + } + + bo->sgt = sgt; + return 0; +} + +static void virtgpu_dma_buf_free_obj(struct drm_gem_object *obj) +{ + struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj); + struct virtio_gpu_device *vgdev = obj->dev->dev_private; + struct dma_buf_attachment *attach = obj->import_attach; + struct dma_resv *resv = attach->dmabuf->resv; + + if (attach) { + dma_resv_lock(resv, NULL); + + virtio_gpu_detach_object_fenced(bo); + + if (bo->sgt) + dma_buf_unmap_attachment(attach, bo->sgt, + DMA_BIDIRECTIONAL); + + dma_resv_unlock(resv); + + dma_buf_detach(attach->dmabuf, attach); + dma_buf_put(attach->dmabuf); + } + + if (bo->created) { + virtio_gpu_cmd_unref_resource(vgdev, bo); + virtio_gpu_notify(vgdev); + return; + } + virtio_gpu_cleanup_object(bo); +} + +static int virtgpu_dma_buf_init_obj(struct drm_device *dev, + struct virtio_gpu_object *bo, + struct dma_buf_attachment *attach) +{ + struct virtio_gpu_device *vgdev = dev->dev_private; + struct virtio_gpu_object_params params = { 0 }; + struct dma_resv *resv = attach->dmabuf->resv; + struct virtio_gpu_mem_entry *ents = NULL; + unsigned int nents; + int ret; + + ret = virtio_gpu_resource_id_get(vgdev, &bo->hw_res_handle); + if (ret) { + virtgpu_dma_buf_free_obj(&bo->base.base); + return ret; + } + + dma_resv_lock(resv, NULL); + + ret = dma_buf_pin(attach); + if (ret) + goto err_pin; + + ret = virtgpu_dma_buf_import_sgt(&ents, &nents, bo, attach); + if (ret) + goto err_import; + + params.blob = true; + params.blob_mem = VIRTGPU_BLOB_MEM_GUEST; + params.blob_flags = VIRTGPU_BLOB_FLAG_USE_SHAREABLE; + params.size = attach->dmabuf->size; + + virtio_gpu_cmd_resource_create_blob(vgdev, bo, ¶ms, + ents, nents); + bo->guest_blob = true; + bo->attached = true; + + dma_buf_unpin(attach); + dma_resv_unlock(resv); + + return 0; + +err_import: + dma_buf_unpin(attach); +err_pin: + dma_resv_unlock(resv); + virtgpu_dma_buf_free_obj(&bo->base.base); + return ret; +} + +static const struct drm_gem_object_funcs virtgpu_gem_dma_buf_funcs = { + .free = virtgpu_dma_buf_free_obj, +}; + +static void virtgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->importer_priv; + struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj); + + if (bo->created && kref_read(&obj->refcount)) { + virtio_gpu_detach_object_fenced(bo); + + if (bo->sgt) + dma_buf_unmap_attachment(attach, bo->sgt, + DMA_BIDIRECTIONAL); + + bo->sgt = NULL; + } +} + +static const struct dma_buf_attach_ops virtgpu_dma_buf_attach_ops = { + .allow_peer2peer = true, + .move_notify = virtgpu_dma_buf_move_notify +}; + struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev, struct dma_buf *buf) { + struct virtio_gpu_device *vgdev = dev->dev_private; + struct dma_buf_attachment *attach; + struct virtio_gpu_object *bo; struct drm_gem_object *obj; + int ret; if (buf->ops == &virtgpu_dmabuf_ops.ops) { obj = buf->priv; @@ -159,7 +310,32 @@ struct drm_gem_object *virtgpu_gem_prime_import(struct drm_device *dev, } } - return drm_gem_prime_import(dev, buf); + if (!vgdev->has_resource_blob || vgdev->has_virgl_3d) + return drm_gem_prime_import(dev, buf); + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (!bo) + return ERR_PTR(-ENOMEM); + + obj = &bo->base.base; + obj->funcs = &virtgpu_gem_dma_buf_funcs; + drm_gem_private_object_init(dev, obj, buf->size); + + attach = dma_buf_dynamic_attach(buf, dev->dev, + &virtgpu_dma_buf_attach_ops, obj); + if (IS_ERR(attach)) { + kfree(bo); + return ERR_CAST(attach); + } + + obj->import_attach = attach; + get_dma_buf(buf); + + ret = virtgpu_dma_buf_init_obj(dev, bo, attach); + if (ret < 0) + return ERR_PTR(ret); + + return obj; } struct drm_gem_object *virtgpu_gem_prime_import_sg_table( diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 0d3d0d09f39b..ad91624df42d 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -645,6 +645,23 @@ virtio_gpu_cmd_resource_attach_backing(struct virtio_gpu_device *vgdev, virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence); } +static void +virtio_gpu_cmd_resource_detach_backing(struct virtio_gpu_device *vgdev, + uint32_t resource_id, + struct virtio_gpu_fence *fence) +{ + struct virtio_gpu_resource_detach_backing *cmd_p; + struct virtio_gpu_vbuffer *vbuf; + + cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p)); + memset(cmd_p, 0, sizeof(*cmd_p)); + + cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING); + cmd_p->resource_id = cpu_to_le32(resource_id); + + virtio_gpu_queue_fenced_ctrl_buffer(vgdev, vbuf, fence); +} + static void virtio_gpu_cmd_get_display_info_cb(struct virtio_gpu_device *vgdev, struct virtio_gpu_vbuffer *vbuf) { @@ -1103,8 +1120,26 @@ void virtio_gpu_object_attach(struct virtio_gpu_device *vgdev, struct virtio_gpu_mem_entry *ents, unsigned int nents) { + if (obj->attached) + return; + virtio_gpu_cmd_resource_attach_backing(vgdev, obj->hw_res_handle, ents, nents, NULL); + + obj->attached = true; +} + +void virtio_gpu_object_detach(struct virtio_gpu_device *vgdev, + struct virtio_gpu_object *obj, + struct virtio_gpu_fence *fence) +{ + if (!obj->attached) + return; + + virtio_gpu_cmd_resource_detach_backing(vgdev, obj->hw_res_handle, + fence); + + obj->attached = false; } void virtio_gpu_cursor_ping(struct virtio_gpu_device *vgdev, diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index 3f0977d746be..b20ac1705726 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -24,64 +24,33 @@ static u16 pre_mul_blend_channel(u16 src, u16 dst, u16 alpha) /** * pre_mul_alpha_blend - alpha blending equation - * @frame_info: Source framebuffer's metadata * @stage_buffer: The line with the pixels from src_plane * @output_buffer: A line buffer that receives all the blends output + * @x_start: The start offset + * @pixel_count: The number of pixels to blend * - * Using the information from the `frame_info`, this blends only the - * necessary pixels from the `stage_buffer` to the `output_buffer` - * using premultiplied blend formula. + * The pixels [@x_start;@x_start+@pixel_count) in stage_buffer are blended at + * [@x_start;@x_start+@pixel_count) in output_buffer. * * The current DRM assumption is that pixel color values have been already * pre-multiplied with the alpha channel values. See more * drm_plane_create_blend_mode_property(). Also, this formula assumes a * completely opaque background. */ -static void pre_mul_alpha_blend(struct vkms_frame_info *frame_info, - struct line_buffer *stage_buffer, - struct line_buffer *output_buffer) +static void pre_mul_alpha_blend(const struct line_buffer *stage_buffer, + struct line_buffer *output_buffer, int x_start, int pixel_count) { - int x_dst = frame_info->dst.x1; - struct pixel_argb_u16 *out = output_buffer->pixels + x_dst; - struct pixel_argb_u16 *in = stage_buffer->pixels; - int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst), - stage_buffer->n_pixels); - - for (int x = 0; x < x_limit; x++) { - out[x].a = (u16)0xffff; - out[x].r = pre_mul_blend_channel(in[x].r, out[x].r, in[x].a); - out[x].g = pre_mul_blend_channel(in[x].g, out[x].g, in[x].a); - out[x].b = pre_mul_blend_channel(in[x].b, out[x].b, in[x].a); + struct pixel_argb_u16 *out = &output_buffer->pixels[x_start]; + const struct pixel_argb_u16 *in = &stage_buffer->pixels[x_start]; + + for (int i = 0; i < pixel_count; i++) { + out[i].a = (u16)0xffff; + out[i].r = pre_mul_blend_channel(in[i].r, out[i].r, in[i].a); + out[i].g = pre_mul_blend_channel(in[i].g, out[i].g, in[i].a); + out[i].b = pre_mul_blend_channel(in[i].b, out[i].b, in[i].a); } } -static int get_y_pos(struct vkms_frame_info *frame_info, int y) -{ - if (frame_info->rotation & DRM_MODE_REFLECT_Y) - return drm_rect_height(&frame_info->rotated) - y - 1; - - switch (frame_info->rotation & DRM_MODE_ROTATE_MASK) { - case DRM_MODE_ROTATE_90: - return frame_info->rotated.x2 - y - 1; - case DRM_MODE_ROTATE_270: - return y + frame_info->rotated.x1; - default: - return y; - } -} - -static bool check_limit(struct vkms_frame_info *frame_info, int pos) -{ - if (drm_rotation_90_or_270(frame_info->rotation)) { - if (pos >= 0 && pos < drm_rect_width(&frame_info->rotated)) - return true; - } else { - if (pos >= frame_info->rotated.y1 && pos < frame_info->rotated.y2) - return true; - } - - return false; -} static void fill_background(const struct pixel_argb_u16 *background_color, struct line_buffer *output_buffer) @@ -96,7 +65,7 @@ static u16 lerp_u16(u16 a, u16 b, s64 t) s64 a_fp = drm_int2fixp(a); s64 b_fp = drm_int2fixp(b); - s64 delta = drm_fixp_mul(b_fp - a_fp, t); + s64 delta = drm_fixp_mul(b_fp - a_fp, t); return drm_fixp2int(a_fp + delta); } @@ -164,6 +133,226 @@ static void apply_lut(const struct vkms_crtc_state *crtc_state, struct line_buff } /** + * direction_for_rotation() - Get the correct reading direction for a given rotation + * + * @rotation: Rotation to analyze. It correspond the field @frame_info.rotation. + * + * This function will use the @rotation setting of a source plane to compute the reading + * direction in this plane which correspond to a "left to right writing" in the CRTC. + * For example, if the buffer is reflected on X axis, the pixel must be read from right to left + * to be written from left to right on the CRTC. + */ +static enum pixel_read_direction direction_for_rotation(unsigned int rotation) +{ + struct drm_rect tmp_a, tmp_b; + int x, y; + + /* + * Points A and B are depicted as zero-size rectangles on the CRTC. + * The CRTC writing direction is from A to B. The plane reading direction + * is discovered by inverse-transforming A and B. + * The reading direction is computed by rotating the vector AB (top-left to top-right) in a + * 1x1 square. + */ + + tmp_a = DRM_RECT_INIT(0, 0, 0, 0); + tmp_b = DRM_RECT_INIT(1, 0, 0, 0); + drm_rect_rotate_inv(&tmp_a, 1, 1, rotation); + drm_rect_rotate_inv(&tmp_b, 1, 1, rotation); + + x = tmp_b.x1 - tmp_a.x1; + y = tmp_b.y1 - tmp_a.y1; + + if (x == 1 && y == 0) + return READ_LEFT_TO_RIGHT; + else if (x == -1 && y == 0) + return READ_RIGHT_TO_LEFT; + else if (y == 1 && x == 0) + return READ_TOP_TO_BOTTOM; + else if (y == -1 && x == 0) + return READ_BOTTOM_TO_TOP; + + WARN_ONCE(true, "The inverse of the rotation gives an incorrect direction."); + return READ_LEFT_TO_RIGHT; +} + +/** + * clamp_line_coordinates() - Compute and clamp the coordinate to read and write during the blend + * process. + * + * @direction: direction of the reading + * @current_plane: current plane blended + * @src_line: source line of the reading. Only the top-left coordinate is used. This rectangle + * must be rotated and have a shape of 1*pixel_count if @direction is vertical and a shape of + * pixel_count*1 if @direction is horizontal. + * @src_x_start: x start coordinate for the line reading + * @src_y_start: y start coordinate for the line reading + * @dst_x_start: x coordinate to blend the read line + * @pixel_count: number of pixels to blend + * + * This function is mainly a safety net to avoid reading outside the source buffer. As the + * userspace should never ask to read outside the source plane, all the cases covered here should + * be dead code. + */ +static void clamp_line_coordinates(enum pixel_read_direction direction, + const struct vkms_plane_state *current_plane, + const struct drm_rect *src_line, int *src_x_start, + int *src_y_start, int *dst_x_start, int *pixel_count) +{ + /* By default the start points are correct */ + *src_x_start = src_line->x1; + *src_y_start = src_line->y1; + *dst_x_start = current_plane->frame_info->dst.x1; + + /* Get the correct number of pixel to blend, it depends of the direction */ + switch (direction) { + case READ_LEFT_TO_RIGHT: + case READ_RIGHT_TO_LEFT: + *pixel_count = drm_rect_width(src_line); + break; + case READ_BOTTOM_TO_TOP: + case READ_TOP_TO_BOTTOM: + *pixel_count = drm_rect_height(src_line); + break; + } + + /* + * Clamp the coordinates to avoid reading outside the buffer + * + * This is mainly a security check to avoid reading outside the buffer, the userspace + * should never request to read outside the source buffer. + */ + switch (direction) { + case READ_LEFT_TO_RIGHT: + case READ_RIGHT_TO_LEFT: + if (*src_x_start < 0) { + *pixel_count += *src_x_start; + *dst_x_start -= *src_x_start; + *src_x_start = 0; + } + if (*src_x_start + *pixel_count > current_plane->frame_info->fb->width) + *pixel_count = max(0, (int)current_plane->frame_info->fb->width - + *src_x_start); + break; + case READ_BOTTOM_TO_TOP: + case READ_TOP_TO_BOTTOM: + if (*src_y_start < 0) { + *pixel_count += *src_y_start; + *dst_x_start -= *src_y_start; + *src_y_start = 0; + } + if (*src_y_start + *pixel_count > current_plane->frame_info->fb->height) + *pixel_count = max(0, (int)current_plane->frame_info->fb->height - + *src_y_start); + break; + } +} + +/** + * blend_line() - Blend a line from a plane to the output buffer + * + * @current_plane: current plane to work on + * @y: line to write in the output buffer + * @crtc_x_limit: width of the output buffer + * @stage_buffer: temporary buffer to convert the pixel line from the source buffer + * @output_buffer: buffer to blend the read line into. + */ +static void blend_line(struct vkms_plane_state *current_plane, int y, + int crtc_x_limit, struct line_buffer *stage_buffer, + struct line_buffer *output_buffer) +{ + int src_x_start, src_y_start, dst_x_start, pixel_count; + struct drm_rect dst_line, tmp_src, src_line; + + /* Avoid rendering useless lines */ + if (y < current_plane->frame_info->dst.y1 || + y >= current_plane->frame_info->dst.y2) + return; + + /* + * dst_line is the line to copy. The initial coordinates are inside the + * destination framebuffer, and then drm_rect_* helpers are used to + * compute the correct position into the source framebuffer. + */ + dst_line = DRM_RECT_INIT(current_plane->frame_info->dst.x1, y, + drm_rect_width(¤t_plane->frame_info->dst), + 1); + + drm_rect_fp_to_int(&tmp_src, ¤t_plane->frame_info->src); + + /* + * [1]: Clamping src_line to the crtc_x_limit to avoid writing outside of + * the destination buffer + */ + dst_line.x1 = max_t(int, dst_line.x1, 0); + dst_line.x2 = min_t(int, dst_line.x2, crtc_x_limit); + /* The destination is completely outside of the crtc. */ + if (dst_line.x2 <= dst_line.x1) + return; + + src_line = dst_line; + + /* + * Transform the coordinate x/y from the crtc to coordinates into + * coordinates for the src buffer. + * + * - Cancel the offset of the dst buffer. + * - Invert the rotation. This assumes that + * dst = drm_rect_rotate(src, rotation) (dst and src have the + * same size, but can be rotated). + * - Apply the offset of the source rectangle to the coordinate. + */ + drm_rect_translate(&src_line, -current_plane->frame_info->dst.x1, + -current_plane->frame_info->dst.y1); + drm_rect_rotate_inv(&src_line, drm_rect_width(&tmp_src), + drm_rect_height(&tmp_src), + current_plane->frame_info->rotation); + drm_rect_translate(&src_line, tmp_src.x1, tmp_src.y1); + + /* Get the correct reading direction in the source buffer. */ + + enum pixel_read_direction direction = + direction_for_rotation(current_plane->frame_info->rotation); + + /* [2]: Compute and clamp the number of pixel to read */ + clamp_line_coordinates(direction, current_plane, &src_line, &src_x_start, &src_y_start, + &dst_x_start, &pixel_count); + + if (pixel_count <= 0) { + /* Nothing to read, so avoid multiple function calls */ + return; + } + + /* + * Modify the starting point to take in account the rotation + * + * src_line is the top-left corner, so when reading READ_RIGHT_TO_LEFT or + * READ_BOTTOM_TO_TOP, it must be changed to the top-right/bottom-left + * corner. + */ + if (direction == READ_RIGHT_TO_LEFT) { + // src_x_start is now the right point + src_x_start += pixel_count - 1; + } else if (direction == READ_BOTTOM_TO_TOP) { + // src_y_start is now the bottom point + src_y_start += pixel_count - 1; + } + + /* + * Perform the conversion and the blending + * + * Here we know that the read line (x_start, y_start, pixel_count) is + * inside the source buffer [2] and we don't write outside the stage + * buffer [1]. + */ + current_plane->pixel_read_line(current_plane, src_x_start, src_y_start, direction, + pixel_count, &stage_buffer->pixels[dst_x_start]); + + pre_mul_alpha_blend(stage_buffer, output_buffer, + dst_x_start, pixel_count); +} + +/** * blend - blend the pixels from all planes and compute crc * @wb: The writeback frame buffer metadata * @crtc_state: The crtc state @@ -183,32 +372,25 @@ static void blend(struct vkms_writeback_job *wb, { struct vkms_plane_state **plane = crtc_state->active_planes; u32 n_active_planes = crtc_state->num_active_planes; - int y_pos; const struct pixel_argb_u16 background_color = { .a = 0xffff }; - size_t crtc_y_limit = crtc_state->base.mode.vdisplay; + int crtc_y_limit = crtc_state->base.mode.vdisplay; + int crtc_x_limit = crtc_state->base.mode.hdisplay; /* * The planes are composed line-by-line to avoid heavy memory usage. It is a necessary * complexity to avoid poor blending performance. * - * The function vkms_compose_row() is used to read a line, pixel-by-pixel, into the staging - * buffer. + * The function pixel_read_line callback is used to read a line, using an efficient + * algorithm for a specific format, into the staging buffer. */ - for (size_t y = 0; y < crtc_y_limit; y++) { + for (int y = 0; y < crtc_y_limit; y++) { fill_background(&background_color, output_buffer); /* The active planes are composed associatively in z-order. */ for (size_t i = 0; i < n_active_planes; i++) { - y_pos = get_y_pos(plane[i]->frame_info, y); - - if (!check_limit(plane[i]->frame_info, y_pos)) - continue; - - vkms_compose_row(stage_buffer, plane[i], y_pos); - pre_mul_alpha_blend(plane[i]->frame_info, stage_buffer, - output_buffer); + blend_line(plane[i], y, crtc_x_limit, stage_buffer, output_buffer); } apply_lut(crtc_state, output_buffer); @@ -216,7 +398,7 @@ static void blend(struct vkms_writeback_job *wb, *crc32 = crc32_le(*crc32, (void *)output_buffer->pixels, row_size); if (wb) - vkms_writeback_row(wb, output_buffer, y_pos); + vkms_writeback_row(wb, output_buffer, y); } } @@ -227,7 +409,7 @@ static int check_format_funcs(struct vkms_crtc_state *crtc_state, u32 n_active_planes = crtc_state->num_active_planes; for (size_t i = 0; i < n_active_planes; i++) - if (!planes[i]->pixel_read) + if (!planes[i]->pixel_read_line) return -1; if (active_wb && !active_wb->pixel_write) @@ -309,8 +491,8 @@ free_stage_buffer: void vkms_composer_worker(struct work_struct *work) { struct vkms_crtc_state *crtc_state = container_of(work, - struct vkms_crtc_state, - composer_work); + struct vkms_crtc_state, + composer_work); struct drm_crtc *crtc = crtc_state->base.crtc; struct vkms_writeback_job *active_wb = crtc_state->active_writeback; struct vkms_output *out = drm_crtc_to_vkms_output(crtc); @@ -335,7 +517,7 @@ void vkms_composer_worker(struct work_struct *work) crtc_state->gamma_lut.base = (struct drm_color_lut *)crtc->state->gamma_lut->data; crtc_state->gamma_lut.lut_length = crtc->state->gamma_lut->length / sizeof(struct drm_color_lut); - max_lut_index_fp = drm_int2fixp(crtc_state->gamma_lut.lut_length - 1); + max_lut_index_fp = drm_int2fixp(crtc_state->gamma_lut.lut_length - 1); crtc_state->gamma_lut.channel_value2index_ratio = drm_fixp_div(max_lut_index_fp, u16_max_fp); @@ -374,7 +556,7 @@ void vkms_composer_worker(struct work_struct *work) drm_crtc_add_crc_entry(crtc, true, frame_start++, &crc32); } -static const char * const pipe_crc_sources[] = {"auto"}; +static const char *const pipe_crc_sources[] = { "auto" }; const char *const *vkms_get_crc_sources(struct drm_crtc *crtc, size_t *count) diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c index bbf080d32d2c..28a57ae109fc 100644 --- a/drivers/gpu/drm/vkms/vkms_crtc.c +++ b/drivers/gpu/drm/vkms/vkms_crtc.c @@ -186,8 +186,7 @@ static int vkms_crtc_atomic_check(struct drm_crtc *crtc, return ret; drm_for_each_plane_mask(plane, crtc->dev, crtc_state->plane_mask) { - plane_state = drm_atomic_get_existing_plane_state(crtc_state->state, - plane); + plane_state = drm_atomic_get_existing_plane_state(crtc_state->state, plane); WARN_ON(!plane_state); if (!plane_state->visible) @@ -203,8 +202,7 @@ static int vkms_crtc_atomic_check(struct drm_crtc *crtc, i = 0; drm_for_each_plane_mask(plane, crtc->dev, crtc_state->plane_mask) { - plane_state = drm_atomic_get_existing_plane_state(crtc_state->state, - plane); + plane_state = drm_atomic_get_existing_plane_state(crtc_state->state, plane); if (!plane_state->visible) continue; diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c index 2d1e95cb66e5..e0409aba9349 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.c +++ b/drivers/gpu/drm/vkms/vkms_drv.c @@ -13,10 +13,10 @@ #include <linux/platform_device.h> #include <linux/dma-mapping.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_gem.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> -#include <drm/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_shmem.h> #include <drm/drm_file.h> @@ -34,7 +34,6 @@ #define DRIVER_NAME "vkms" #define DRIVER_DESC "Virtual Kernel Mode Setting" -#define DRIVER_DATE "20180514" #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 @@ -82,8 +81,7 @@ static void vkms_atomic_commit_tail(struct drm_atomic_state *old_state) drm_atomic_helper_wait_for_flip_done(dev, old_state); for_each_old_crtc_in_state(old_state, crtc, old_crtc_state, i) { - struct vkms_crtc_state *vkms_state = - to_vkms_crtc_state(old_crtc_state); + struct vkms_crtc_state *vkms_state = to_vkms_crtc_state(old_crtc_state); flush_work(&vkms_state->composer_work); } @@ -117,7 +115,6 @@ static const struct drm_driver vkms_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, }; @@ -174,7 +171,7 @@ static int vkms_modeset_init(struct vkms_device *vkmsdev) dev->mode_config.preferred_depth = 0; dev->mode_config.helper_private = &vkms_mode_config_helpers; - return vkms_output_init(vkmsdev, 0); + return vkms_output_init(vkmsdev); } static int vkms_create(struct vkms_config *config) diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h index 672fe191e239..00541eff3d1b 100644 --- a/drivers/gpu/drm/vkms/vkms_drv.h +++ b/drivers/gpu/drm/vkms/vkms_drv.h @@ -39,12 +39,8 @@ struct vkms_frame_info { struct drm_framebuffer *fb; struct drm_rect src, dst; - struct drm_rect rotated; struct iosys_map map[DRM_FORMAT_MAX_PLANES]; unsigned int rotation; - unsigned int offset; - unsigned int pitch; - unsigned int cpp; }; struct pixel_argb_u16 { @@ -56,23 +52,65 @@ struct line_buffer { struct pixel_argb_u16 *pixels; }; +/** + * typedef pixel_write_t - These functions are used to read a pixel from a + * &struct pixel_argb_u16, convert it in a specific format and write it in the @out_pixel + * buffer. + * + * @out_pixel: destination address to write the pixel + * @in_pixel: pixel to write + */ +typedef void (*pixel_write_t)(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel); + struct vkms_writeback_job { struct iosys_map data[DRM_FORMAT_MAX_PLANES]; struct vkms_frame_info wb_frame_info; - void (*pixel_write)(u8 *dst_pixels, struct pixel_argb_u16 *in_pixel); + pixel_write_t pixel_write; }; /** + * enum pixel_read_direction - Enum used internally by VKMS to represent a reading direction in a + * plane. + */ +enum pixel_read_direction { + READ_BOTTOM_TO_TOP, + READ_TOP_TO_BOTTOM, + READ_RIGHT_TO_LEFT, + READ_LEFT_TO_RIGHT +}; + +struct vkms_plane_state; + +/** + * typedef pixel_read_line_t - These functions are used to read a pixel line in the source frame, + * convert it to `struct pixel_argb_u16` and write it to @out_pixel. + * + * @plane: plane used as source for the pixel value + * @x_start: X (width) coordinate of the first pixel to copy. The caller must ensure that x_start + * is non-negative and smaller than @plane->frame_info->fb->width. + * @y_start: Y (height) coordinate of the first pixel to copy. The caller must ensure that y_start + * is non-negative and smaller than @plane->frame_info->fb->height. + * @direction: direction to use for the copy, starting at @x_start/@y_start + * @count: number of pixels to copy + * @out_pixel: pointer where to write the pixel values. They will be written from @out_pixel[0] + * (included) to @out_pixel[@count] (excluded). The caller must ensure that out_pixel have a + * length of at least @count. + */ +typedef void (*pixel_read_line_t)(const struct vkms_plane_state *plane, int x_start, + int y_start, enum pixel_read_direction direction, int count, + struct pixel_argb_u16 out_pixel[]); + +/** * struct vkms_plane_state - Driver specific plane state * @base: base plane state * @frame_info: data required for composing computation - * @pixel_read: function to read a pixel in this plane. The creator of a struct vkms_plane_state - * must ensure that this pointer is valid + * @pixel_read_line: function to read a pixel line in this plane. The creator of a + * struct vkms_plane_state must ensure that this pointer is valid */ struct vkms_plane_state { struct drm_shadow_plane_state base; struct vkms_frame_info *frame_info; - void (*pixel_read)(u8 *src_buffer, struct pixel_argb_u16 *out_pixel); + pixel_read_line_t pixel_read_line; }; struct vkms_plane { @@ -212,21 +250,17 @@ int vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, * vkms_output_init() - Initialize all sub-components needed for a VKMS device. * * @vkmsdev: VKMS device to initialize - * @index: CRTC which can be attached to the planes. The caller must ensure that - * @index is positive and less or equals to 31. */ -int vkms_output_init(struct vkms_device *vkmsdev, int index); +int vkms_output_init(struct vkms_device *vkmsdev); /** * vkms_plane_init() - Initialize a plane * * @vkmsdev: VKMS device containing the plane * @type: type of plane to initialize - * @index: CRTC which can be attached to the plane. The caller must ensure that - * @index is positive and less or equals to 31. */ struct vkms_plane *vkms_plane_init(struct vkms_device *vkmsdev, - enum drm_plane_type type, int index); + enum drm_plane_type type); /* CRC Support */ const char *const *vkms_get_crc_sources(struct drm_crtc *crtc, @@ -238,7 +272,6 @@ int vkms_verify_crc_source(struct drm_crtc *crtc, const char *source_name, /* Composer Support */ void vkms_composer_worker(struct work_struct *work); void vkms_set_composer(struct vkms_output *out, bool enabled); -void vkms_compose_row(struct line_buffer *stage_buffer, struct vkms_plane_state *plane, int y); void vkms_writeback_row(struct vkms_writeback_job *wb, const struct line_buffer *src_buffer, int y); /* Writeback */ diff --git a/drivers/gpu/drm/vkms/vkms_formats.c b/drivers/gpu/drm/vkms/vkms_formats.c index e8a5cc235ebb..39b1d7c97d45 100644 --- a/drivers/gpu/drm/vkms/vkms_formats.c +++ b/drivers/gpu/drm/vkms/vkms_formats.c @@ -10,21 +10,46 @@ #include "vkms_formats.h" /** - * pixel_offset() - Get the offset of the pixel at coordinates x/y in the first plane + * packed_pixels_offset() - Get the offset of the block containing the pixel at coordinates x/y * * @frame_info: Buffer metadata * @x: The x coordinate of the wanted pixel in the buffer * @y: The y coordinate of the wanted pixel in the buffer + * @plane_index: The index of the plane to use + * @offset: The returned offset inside the buffer of the block + * @rem_x: The returned X coordinate of the requested pixel in the block + * @rem_y: The returned Y coordinate of the requested pixel in the block * - * The caller must ensure that the framebuffer associated with this request uses a pixel format - * where block_h == block_w == 1. - * If this requirement is not fulfilled, the resulting offset can point to an other pixel or - * outside of the buffer. + * As some pixel formats store multiple pixels in a block (DRM_FORMAT_R* for example), some + * pixels are not individually addressable. This function return 3 values: the offset of the + * whole block, and the coordinate of the requested pixel inside this block. + * For example, if the format is DRM_FORMAT_R1 and the requested coordinate is 13,5, the offset + * will point to the byte 5*pitches + 13/8 (second byte of the 5th line), and the rem_x/rem_y + * coordinates will be (13 % 8, 5 % 1) = (5, 0) + * + * With this function, the caller just have to extract the correct pixel from the block. */ -static size_t pixel_offset(const struct vkms_frame_info *frame_info, int x, int y) +static void packed_pixels_offset(const struct vkms_frame_info *frame_info, int x, int y, + int plane_index, int *offset, int *rem_x, int *rem_y) { - return frame_info->offset + (y * frame_info->pitch) - + (x * frame_info->cpp); + struct drm_framebuffer *fb = frame_info->fb; + const struct drm_format_info *format = frame_info->fb->format; + /* Directly using x and y to multiply pitches and format->ccp is not sufficient because + * in some formats a block can represent multiple pixels. + * + * Dividing x and y by the block size allows to extract the correct offset of the block + * containing the pixel. + */ + + int block_x = x / drm_format_info_block_width(format, plane_index); + int block_y = y / drm_format_info_block_height(format, plane_index); + int block_pitch = fb->pitches[plane_index] * drm_format_info_block_height(format, + plane_index); + *rem_x = x % drm_format_info_block_width(format, plane_index); + *rem_y = y % drm_format_info_block_height(format, plane_index); + *offset = fb->offsets[plane_index] + + block_y * block_pitch + + block_x * format->char_per_block[plane_index]; } /** @@ -34,145 +59,266 @@ static size_t pixel_offset(const struct vkms_frame_info *frame_info, int x, int * @frame_info: Buffer metadata * @x: The x (width) coordinate inside the plane * @y: The y (height) coordinate inside the plane + * @plane_index: The index of the plane + * @addr: The returned pointer + * @rem_x: The returned X coordinate of the requested pixel in the block + * @rem_y: The returned Y coordinate of the requested pixel in the block * - * Takes the information stored in the frame_info, a pair of coordinates, and - * returns the address of the first color channel. - * This function assumes the channels are packed together, i.e. a color channel - * comes immediately after another in the memory. And therefore, this function - * doesn't work for YUV with chroma subsampling (e.g. YUV420 and NV21). + * Takes the information stored in the frame_info, a pair of coordinates, and returns the address + * of the block containing this pixel and the pixel position inside this block. * - * The caller must ensure that the framebuffer associated with this request uses a pixel format - * where block_h == block_w == 1, otherwise the returned pointer can be outside the buffer. + * See @packed_pixels_offset for details about rem_x/rem_y behavior. */ -static void *packed_pixels_addr(const struct vkms_frame_info *frame_info, - int x, int y) +static void packed_pixels_addr(const struct vkms_frame_info *frame_info, + int x, int y, int plane_index, u8 **addr, int *rem_x, + int *rem_y) { - size_t offset = pixel_offset(frame_info, x, y); + int offset; - return (u8 *)frame_info->map[0].vaddr + offset; + packed_pixels_offset(frame_info, x, y, plane_index, &offset, rem_x, rem_y); + *addr = (u8 *)frame_info->map[0].vaddr + offset; } -static void *get_packed_src_addr(const struct vkms_frame_info *frame_info, int y) +/** + * get_block_step_bytes() - Common helper to compute the correct step value between each pixel block + * to read in a certain direction. + * + * @fb: Framebuffer to iter on + * @direction: Direction of the reading + * @plane_index: Plane to get the step from + * + * As the returned count is the number of bytes between two consecutive blocks in a direction, + * the caller may have to read multiple pixels before using the next one (for example, to read from + * left to right in a DRM_FORMAT_R1 plane, each block contains 8 pixels, so the step must be used + * only every 8 pixels). + */ +static int get_block_step_bytes(struct drm_framebuffer *fb, enum pixel_read_direction direction, + int plane_index) { - int x_src = frame_info->src.x1 >> 16; - int y_src = y - frame_info->rotated.y1 + (frame_info->src.y1 >> 16); + switch (direction) { + case READ_LEFT_TO_RIGHT: + return fb->format->char_per_block[plane_index]; + case READ_RIGHT_TO_LEFT: + return -fb->format->char_per_block[plane_index]; + case READ_TOP_TO_BOTTOM: + return (int)fb->pitches[plane_index] * drm_format_info_block_width(fb->format, + plane_index); + case READ_BOTTOM_TO_TOP: + return -(int)fb->pitches[plane_index] * drm_format_info_block_width(fb->format, + plane_index); + } - return packed_pixels_addr(frame_info, x_src, y_src); + return 0; } -static int get_x_position(const struct vkms_frame_info *frame_info, int limit, int x) +/** + * packed_pixels_addr_1x1() - Get the pointer to the block containing the pixel at the given + * coordinates + * + * @frame_info: Buffer metadata + * @x: The x (width) coordinate inside the plane + * @y: The y (height) coordinate inside the plane + * @plane_index: The index of the plane + * @addr: The returned pointer + * + * This function can only be used with format where block_h == block_w == 1. + */ +static void packed_pixels_addr_1x1(const struct vkms_frame_info *frame_info, + int x, int y, int plane_index, u8 **addr) { - if (frame_info->rotation & (DRM_MODE_REFLECT_X | DRM_MODE_ROTATE_270)) - return limit - x - 1; - return x; + int offset, rem_x, rem_y; + + WARN_ONCE(drm_format_info_block_width(frame_info->fb->format, + plane_index) != 1, + "%s() only support formats with block_w == 1", __func__); + WARN_ONCE(drm_format_info_block_height(frame_info->fb->format, + plane_index) != 1, + "%s() only support formats with block_h == 1", __func__); + + packed_pixels_offset(frame_info, x, y, plane_index, &offset, &rem_x, + &rem_y); + *addr = (u8 *)frame_info->map[0].vaddr + offset; } /* - * The following functions take pixel data from the buffer and convert them to the format - * ARGB16161616 in @out_pixel. + * The following functions take pixel data (a, r, g, b, pixel, ...) and convert them to + * &struct pixel_argb_u16 * - * They are used in the vkms_compose_row() function to handle multiple formats. + * They are used in the `read_line`s functions to avoid duplicate work for some pixel formats. */ -static void ARGB8888_to_argb_u16(u8 *src_pixels, struct pixel_argb_u16 *out_pixel) +static struct pixel_argb_u16 argb_u16_from_u8888(u8 a, u8 r, u8 g, u8 b) { + struct pixel_argb_u16 out_pixel; /* * The 257 is the "conversion ratio". This number is obtained by the * (2^16 - 1) / (2^8 - 1) division. Which, in this case, tries to get * the best color value in a pixel format with more possibilities. * A similar idea applies to others RGB color conversions. */ - out_pixel->a = (u16)src_pixels[3] * 257; - out_pixel->r = (u16)src_pixels[2] * 257; - out_pixel->g = (u16)src_pixels[1] * 257; - out_pixel->b = (u16)src_pixels[0] * 257; -} + out_pixel.a = (u16)a * 257; + out_pixel.r = (u16)r * 257; + out_pixel.g = (u16)g * 257; + out_pixel.b = (u16)b * 257; -static void XRGB8888_to_argb_u16(u8 *src_pixels, struct pixel_argb_u16 *out_pixel) -{ - out_pixel->a = (u16)0xffff; - out_pixel->r = (u16)src_pixels[2] * 257; - out_pixel->g = (u16)src_pixels[1] * 257; - out_pixel->b = (u16)src_pixels[0] * 257; + return out_pixel; } -static void ARGB16161616_to_argb_u16(u8 *src_pixels, struct pixel_argb_u16 *out_pixel) +static struct pixel_argb_u16 argb_u16_from_u16161616(u16 a, u16 r, u16 g, u16 b) { - __le16 *pixels = (__force __le16 *)src_pixels; + struct pixel_argb_u16 out_pixel; - out_pixel->a = le16_to_cpu(pixels[3]); - out_pixel->r = le16_to_cpu(pixels[2]); - out_pixel->g = le16_to_cpu(pixels[1]); - out_pixel->b = le16_to_cpu(pixels[0]); + out_pixel.a = a; + out_pixel.r = r; + out_pixel.g = g; + out_pixel.b = b; + + return out_pixel; } -static void XRGB16161616_to_argb_u16(u8 *src_pixels, struct pixel_argb_u16 *out_pixel) +static struct pixel_argb_u16 argb_u16_from_le16161616(__le16 a, __le16 r, __le16 g, __le16 b) { - __le16 *pixels = (__force __le16 *)src_pixels; - - out_pixel->a = (u16)0xffff; - out_pixel->r = le16_to_cpu(pixels[2]); - out_pixel->g = le16_to_cpu(pixels[1]); - out_pixel->b = le16_to_cpu(pixels[0]); + return argb_u16_from_u16161616(le16_to_cpu(a), le16_to_cpu(r), le16_to_cpu(g), + le16_to_cpu(b)); } -static void RGB565_to_argb_u16(u8 *src_pixels, struct pixel_argb_u16 *out_pixel) +static struct pixel_argb_u16 argb_u16_from_RGB565(const __le16 *pixel) { - __le16 *pixels = (__force __le16 *)src_pixels; + struct pixel_argb_u16 out_pixel; s64 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(31)); s64 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(63)); - u16 rgb_565 = le16_to_cpu(*pixels); + u16 rgb_565 = le16_to_cpu(*pixel); s64 fp_r = drm_int2fixp((rgb_565 >> 11) & 0x1f); s64 fp_g = drm_int2fixp((rgb_565 >> 5) & 0x3f); s64 fp_b = drm_int2fixp(rgb_565 & 0x1f); - out_pixel->a = (u16)0xffff; - out_pixel->r = drm_fixp2int_round(drm_fixp_mul(fp_r, fp_rb_ratio)); - out_pixel->g = drm_fixp2int_round(drm_fixp_mul(fp_g, fp_g_ratio)); - out_pixel->b = drm_fixp2int_round(drm_fixp_mul(fp_b, fp_rb_ratio)); + out_pixel.a = (u16)0xffff; + out_pixel.r = drm_fixp2int_round(drm_fixp_mul(fp_r, fp_rb_ratio)); + out_pixel.g = drm_fixp2int_round(drm_fixp_mul(fp_g, fp_g_ratio)); + out_pixel.b = drm_fixp2int_round(drm_fixp_mul(fp_b, fp_rb_ratio)); + + return out_pixel; } -/** - * vkms_compose_row - compose a single row of a plane - * @stage_buffer: output line with the composed pixels - * @plane: state of the plane that is being composed - * @y: y coordinate of the row +/* + * The following functions are read_line function for each pixel format supported by VKMS. + * + * They read a line starting at the point @x_start,@y_start following the @direction. The result + * is stored in @out_pixel and in the format ARGB16161616. + * + * These functions are very repetitive, but the innermost pixel loops must be kept inside these + * functions for performance reasons. Some benchmarking was done in [1] where having the innermost + * loop factored out of these functions showed a slowdown by a factor of three. * - * This function composes a single row of a plane. It gets the source pixels - * through the y coordinate (see get_packed_src_addr()) and goes linearly - * through the source pixel, reading the pixels and converting it to - * ARGB16161616 (see the pixel_read() callback). For rotate-90 and rotate-270, - * the source pixels are not traversed linearly. The source pixels are queried - * on each iteration in order to traverse the pixels vertically. + * [1]: https://lore.kernel.org/dri-devel/d258c8dc-78e9-4509-9037-a98f7f33b3a3@riseup.net/ */ -void vkms_compose_row(struct line_buffer *stage_buffer, struct vkms_plane_state *plane, int y) + +static void ARGB8888_read_line(const struct vkms_plane_state *plane, int x_start, int y_start, + enum pixel_read_direction direction, int count, + struct pixel_argb_u16 out_pixel[]) { - struct pixel_argb_u16 *out_pixels = stage_buffer->pixels; - struct vkms_frame_info *frame_info = plane->frame_info; - u8 *src_pixels = get_packed_src_addr(frame_info, y); - int limit = min_t(size_t, drm_rect_width(&frame_info->dst), stage_buffer->n_pixels); + struct pixel_argb_u16 *end = out_pixel + count; + u8 *src_pixels; - for (size_t x = 0; x < limit; x++, src_pixels += frame_info->cpp) { - int x_pos = get_x_position(frame_info, limit, x); + packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); - if (drm_rotation_90_or_270(frame_info->rotation)) - src_pixels = get_packed_src_addr(frame_info, x + frame_info->rotated.y1) - + frame_info->cpp * y; + int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); - plane->pixel_read(src_pixels, &out_pixels[x_pos]); + while (out_pixel < end) { + u8 *px = (u8 *)src_pixels; + *out_pixel = argb_u16_from_u8888(px[3], px[2], px[1], px[0]); + out_pixel += 1; + src_pixels += step; + } +} + +static void XRGB8888_read_line(const struct vkms_plane_state *plane, int x_start, int y_start, + enum pixel_read_direction direction, int count, + struct pixel_argb_u16 out_pixel[]) +{ + struct pixel_argb_u16 *end = out_pixel + count; + u8 *src_pixels; + + packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); + + int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); + + while (out_pixel < end) { + u8 *px = (u8 *)src_pixels; + *out_pixel = argb_u16_from_u8888(255, px[2], px[1], px[0]); + out_pixel += 1; + src_pixels += step; + } +} + +static void ARGB16161616_read_line(const struct vkms_plane_state *plane, int x_start, + int y_start, enum pixel_read_direction direction, int count, + struct pixel_argb_u16 out_pixel[]) +{ + struct pixel_argb_u16 *end = out_pixel + count; + u8 *src_pixels; + + packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); + + int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); + + while (out_pixel < end) { + u16 *px = (u16 *)src_pixels; + *out_pixel = argb_u16_from_u16161616(px[3], px[2], px[1], px[0]); + out_pixel += 1; + src_pixels += step; + } +} + +static void XRGB16161616_read_line(const struct vkms_plane_state *plane, int x_start, + int y_start, enum pixel_read_direction direction, int count, + struct pixel_argb_u16 out_pixel[]) +{ + struct pixel_argb_u16 *end = out_pixel + count; + u8 *src_pixels; + + packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); + + int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); + + while (out_pixel < end) { + __le16 *px = (__le16 *)src_pixels; + *out_pixel = argb_u16_from_le16161616(cpu_to_le16(0xFFFF), px[2], px[1], px[0]); + out_pixel += 1; + src_pixels += step; + } +} + +static void RGB565_read_line(const struct vkms_plane_state *plane, int x_start, + int y_start, enum pixel_read_direction direction, int count, + struct pixel_argb_u16 out_pixel[]) +{ + struct pixel_argb_u16 *end = out_pixel + count; + u8 *src_pixels; + + packed_pixels_addr_1x1(plane->frame_info, x_start, y_start, 0, &src_pixels); + + int step = get_block_step_bytes(plane->frame_info->fb, direction, 0); + + while (out_pixel < end) { + __le16 *px = (__le16 *)src_pixels; + + *out_pixel = argb_u16_from_RGB565(px); + out_pixel += 1; + src_pixels += step; } } /* * The following functions take one &struct pixel_argb_u16 and convert it to a specific format. - * The result is stored in @dst_pixels. + * The result is stored in @out_pixel. * * They are used in vkms_writeback_row() to convert and store a pixel from the src_buffer to * the writeback buffer. */ -static void argb_u16_to_ARGB8888(u8 *dst_pixels, struct pixel_argb_u16 *in_pixel) +static void argb_u16_to_ARGB8888(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel) { /* * This sequence below is important because the format's byte order is @@ -184,43 +330,43 @@ static void argb_u16_to_ARGB8888(u8 *dst_pixels, struct pixel_argb_u16 *in_pixel * | Addr + 2 | = Red channel * | Addr + 3 | = Alpha channel */ - dst_pixels[3] = DIV_ROUND_CLOSEST(in_pixel->a, 257); - dst_pixels[2] = DIV_ROUND_CLOSEST(in_pixel->r, 257); - dst_pixels[1] = DIV_ROUND_CLOSEST(in_pixel->g, 257); - dst_pixels[0] = DIV_ROUND_CLOSEST(in_pixel->b, 257); + out_pixel[3] = DIV_ROUND_CLOSEST(in_pixel->a, 257); + out_pixel[2] = DIV_ROUND_CLOSEST(in_pixel->r, 257); + out_pixel[1] = DIV_ROUND_CLOSEST(in_pixel->g, 257); + out_pixel[0] = DIV_ROUND_CLOSEST(in_pixel->b, 257); } -static void argb_u16_to_XRGB8888(u8 *dst_pixels, struct pixel_argb_u16 *in_pixel) +static void argb_u16_to_XRGB8888(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel) { - dst_pixels[3] = 0xff; - dst_pixels[2] = DIV_ROUND_CLOSEST(in_pixel->r, 257); - dst_pixels[1] = DIV_ROUND_CLOSEST(in_pixel->g, 257); - dst_pixels[0] = DIV_ROUND_CLOSEST(in_pixel->b, 257); + out_pixel[3] = 0xff; + out_pixel[2] = DIV_ROUND_CLOSEST(in_pixel->r, 257); + out_pixel[1] = DIV_ROUND_CLOSEST(in_pixel->g, 257); + out_pixel[0] = DIV_ROUND_CLOSEST(in_pixel->b, 257); } -static void argb_u16_to_ARGB16161616(u8 *dst_pixels, struct pixel_argb_u16 *in_pixel) +static void argb_u16_to_ARGB16161616(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel) { - __le16 *pixels = (__force __le16 *)dst_pixels; + __le16 *pixel = (__le16 *)out_pixel; - pixels[3] = cpu_to_le16(in_pixel->a); - pixels[2] = cpu_to_le16(in_pixel->r); - pixels[1] = cpu_to_le16(in_pixel->g); - pixels[0] = cpu_to_le16(in_pixel->b); + pixel[3] = cpu_to_le16(in_pixel->a); + pixel[2] = cpu_to_le16(in_pixel->r); + pixel[1] = cpu_to_le16(in_pixel->g); + pixel[0] = cpu_to_le16(in_pixel->b); } -static void argb_u16_to_XRGB16161616(u8 *dst_pixels, struct pixel_argb_u16 *in_pixel) +static void argb_u16_to_XRGB16161616(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel) { - __le16 *pixels = (__force __le16 *)dst_pixels; + __le16 *pixel = (__le16 *)out_pixel; - pixels[3] = cpu_to_le16(0xffff); - pixels[2] = cpu_to_le16(in_pixel->r); - pixels[1] = cpu_to_le16(in_pixel->g); - pixels[0] = cpu_to_le16(in_pixel->b); + pixel[3] = cpu_to_le16(0xffff); + pixel[2] = cpu_to_le16(in_pixel->r); + pixel[1] = cpu_to_le16(in_pixel->g); + pixel[0] = cpu_to_le16(in_pixel->b); } -static void argb_u16_to_RGB565(u8 *dst_pixels, struct pixel_argb_u16 *in_pixel) +static void argb_u16_to_RGB565(u8 *out_pixel, const struct pixel_argb_u16 *in_pixel) { - __le16 *pixels = (__force __le16 *)dst_pixels; + __le16 *pixel = (__le16 *)out_pixel; s64 fp_rb_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(31)); s64 fp_g_ratio = drm_fixp_div(drm_int2fixp(65535), drm_int2fixp(63)); @@ -233,7 +379,7 @@ static void argb_u16_to_RGB565(u8 *dst_pixels, struct pixel_argb_u16 *in_pixel) u16 g = drm_fixp2int(drm_fixp_div(fp_g, fp_g_ratio)); u16 b = drm_fixp2int(drm_fixp_div(fp_b, fp_rb_ratio)); - *pixels = cpu_to_le16(r << 11 | g << 5 | b); + *pixel = cpu_to_le16(r << 11 | g << 5 | b); } /** @@ -249,36 +395,47 @@ void vkms_writeback_row(struct vkms_writeback_job *wb, { struct vkms_frame_info *frame_info = &wb->wb_frame_info; int x_dst = frame_info->dst.x1; - u8 *dst_pixels = packed_pixels_addr(frame_info, x_dst, y); + u8 *dst_pixels; + int rem_x, rem_y; + + packed_pixels_addr(frame_info, x_dst, y, 0, &dst_pixels, &rem_x, &rem_y); struct pixel_argb_u16 *in_pixels = src_buffer->pixels; int x_limit = min_t(size_t, drm_rect_width(&frame_info->dst), src_buffer->n_pixels); - for (size_t x = 0; x < x_limit; x++, dst_pixels += frame_info->cpp) + for (size_t x = 0; x < x_limit; x++, dst_pixels += frame_info->fb->format->cpp[0]) wb->pixel_write(dst_pixels, &in_pixels[x]); } /** - * get_pixel_conversion_function() - Retrieve the correct read_pixel function for a specific + * get_pixel_read_line_function() - Retrieve the correct read_line function for a specific * format. The returned pointer is NULL for unsupported pixel formats. The caller must ensure that * the pointer is valid before using it in a vkms_plane_state. * * @format: DRM_FORMAT_* value for which to obtain a conversion function (see [drm_fourcc.h]) */ -void *get_pixel_conversion_function(u32 format) +pixel_read_line_t get_pixel_read_line_function(u32 format) { switch (format) { case DRM_FORMAT_ARGB8888: - return &ARGB8888_to_argb_u16; + return &ARGB8888_read_line; case DRM_FORMAT_XRGB8888: - return &XRGB8888_to_argb_u16; + return &XRGB8888_read_line; case DRM_FORMAT_ARGB16161616: - return &ARGB16161616_to_argb_u16; + return &ARGB16161616_read_line; case DRM_FORMAT_XRGB16161616: - return &XRGB16161616_to_argb_u16; + return &XRGB16161616_read_line; case DRM_FORMAT_RGB565: - return &RGB565_to_argb_u16; + return &RGB565_read_line; default: - return NULL; + /* + * This is a bug in vkms_plane_atomic_check(). All the supported + * format must: + * - Be listed in vkms_formats in vkms_plane.c + * - Have a pixel_read callback defined here + */ + pr_err("Pixel format %p4cc is not supported by VKMS planes. This is a kernel bug, atomic check must forbid this configuration.\n", + &format); + BUG(); } } @@ -289,7 +446,7 @@ void *get_pixel_conversion_function(u32 format) * * @format: DRM_FORMAT_* value for which to obtain a conversion function (see [drm_fourcc.h]) */ -void *get_pixel_write_function(u32 format) +pixel_write_t get_pixel_write_function(u32 format) { switch (format) { case DRM_FORMAT_ARGB8888: @@ -303,6 +460,14 @@ void *get_pixel_write_function(u32 format) case DRM_FORMAT_RGB565: return &argb_u16_to_RGB565; default: - return NULL; + /* + * This is a bug in vkms_writeback_atomic_check. All the supported + * format must: + * - Be listed in vkms_wb_formats in vkms_writeback.c + * - Have a pixel_write callback defined here + */ + pr_err("Pixel format %p4cc is not supported by VKMS writeback. This is a kernel bug, atomic check must forbid this configuration.\n", + &format); + BUG(); } } diff --git a/drivers/gpu/drm/vkms/vkms_formats.h b/drivers/gpu/drm/vkms/vkms_formats.h index cf59c2ed8e9a..8d2bef95ff79 100644 --- a/drivers/gpu/drm/vkms/vkms_formats.h +++ b/drivers/gpu/drm/vkms/vkms_formats.h @@ -5,8 +5,8 @@ #include "vkms_drv.h" -void *get_pixel_conversion_function(u32 format); +pixel_read_line_t get_pixel_read_line_function(u32 format); -void *get_pixel_write_function(u32 format); +pixel_write_t get_pixel_write_function(u32 format); #endif /* _VKMS_FORMATS_H_ */ diff --git a/drivers/gpu/drm/vkms/vkms_output.c b/drivers/gpu/drm/vkms/vkms_output.c index 25a99fde126c..8f4bd5aef087 100644 --- a/drivers/gpu/drm/vkms/vkms_output.c +++ b/drivers/gpu/drm/vkms/vkms_output.c @@ -32,29 +32,14 @@ static const struct drm_connector_helper_funcs vkms_conn_helper_funcs = { .get_modes = vkms_conn_get_modes, }; -static int vkms_add_overlay_plane(struct vkms_device *vkmsdev, int index, - struct drm_crtc *crtc) -{ - struct vkms_plane *overlay; - - overlay = vkms_plane_init(vkmsdev, DRM_PLANE_TYPE_OVERLAY, index); - if (IS_ERR(overlay)) - return PTR_ERR(overlay); - - if (!overlay->base.possible_crtcs) - overlay->base.possible_crtcs = drm_crtc_mask(crtc); - - return 0; -} - -int vkms_output_init(struct vkms_device *vkmsdev, int index) +int vkms_output_init(struct vkms_device *vkmsdev) { struct vkms_output *output = &vkmsdev->output; struct drm_device *dev = &vkmsdev->drm; struct drm_connector *connector = &output->connector; struct drm_encoder *encoder = &output->encoder; struct drm_crtc *crtc = &output->crtc; - struct vkms_plane *primary, *cursor = NULL; + struct vkms_plane *primary, *overlay, *cursor = NULL; int ret; int writeback; unsigned int n; @@ -65,29 +50,31 @@ int vkms_output_init(struct vkms_device *vkmsdev, int index) * The overlay and cursor planes are not mandatory, but can be used to perform complex * composition. */ - primary = vkms_plane_init(vkmsdev, DRM_PLANE_TYPE_PRIMARY, index); + primary = vkms_plane_init(vkmsdev, DRM_PLANE_TYPE_PRIMARY); if (IS_ERR(primary)) return PTR_ERR(primary); - if (vkmsdev->config->overlay) { - for (n = 0; n < NUM_OVERLAY_PLANES; n++) { - ret = vkms_add_overlay_plane(vkmsdev, index, crtc); - if (ret) - return ret; - } - } - if (vkmsdev->config->cursor) { - cursor = vkms_plane_init(vkmsdev, DRM_PLANE_TYPE_CURSOR, index); + cursor = vkms_plane_init(vkmsdev, DRM_PLANE_TYPE_CURSOR); if (IS_ERR(cursor)) return PTR_ERR(cursor); } - /* [1]: Allocation of a CRTC, its index will be BIT(0) = 1 */ ret = vkms_crtc_init(dev, crtc, &primary->base, &cursor->base); if (ret) return ret; + if (vkmsdev->config->overlay) { + for (n = 0; n < NUM_OVERLAY_PLANES; n++) { + overlay = vkms_plane_init(vkmsdev, DRM_PLANE_TYPE_OVERLAY); + if (IS_ERR(overlay)) { + DRM_DEV_ERROR(dev->dev, "Failed to init vkms plane\n"); + return PTR_ERR(overlay); + } + overlay->base.possible_crtcs = drm_crtc_mask(crtc); + } + } + ret = drm_connector_init(dev, connector, &vkms_connector_funcs, DRM_MODE_CONNECTOR_VIRTUAL); if (ret) { @@ -103,11 +90,7 @@ int vkms_output_init(struct vkms_device *vkmsdev, int index) DRM_ERROR("Failed to init encoder\n"); goto err_encoder; } - /* - * This is a hardcoded value to select crtc for the encoder. - * BIT(0) here designate the first registered CRTC, the one allocated in [1] - */ - encoder->possible_crtcs = BIT(0); + encoder->possible_crtcs = drm_crtc_mask(crtc); ret = drm_connector_attach_encoder(connector, encoder); if (ret) { diff --git a/drivers/gpu/drm/vkms/vkms_plane.c b/drivers/gpu/drm/vkms/vkms_plane.c index e5c625ab8e3e..e2fce471870f 100644 --- a/drivers/gpu/drm/vkms/vkms_plane.c +++ b/drivers/gpu/drm/vkms/vkms_plane.c @@ -112,23 +112,12 @@ static void vkms_plane_atomic_update(struct drm_plane *plane, frame_info = vkms_plane_state->frame_info; memcpy(&frame_info->src, &new_state->src, sizeof(struct drm_rect)); memcpy(&frame_info->dst, &new_state->dst, sizeof(struct drm_rect)); - memcpy(&frame_info->rotated, &new_state->dst, sizeof(struct drm_rect)); frame_info->fb = fb; memcpy(&frame_info->map, &shadow_plane_state->data, sizeof(frame_info->map)); drm_framebuffer_get(frame_info->fb); - frame_info->rotation = drm_rotation_simplify(new_state->rotation, DRM_MODE_ROTATE_0 | - DRM_MODE_ROTATE_90 | - DRM_MODE_ROTATE_270 | - DRM_MODE_REFLECT_X | - DRM_MODE_REFLECT_Y); - - drm_rect_rotate(&frame_info->rotated, drm_rect_width(&frame_info->rotated), - drm_rect_height(&frame_info->rotated), frame_info->rotation); - - frame_info->offset = fb->offsets[0]; - frame_info->pitch = fb->pitches[0]; - frame_info->cpp = fb->format->cpp[0]; - vkms_plane_state->pixel_read = get_pixel_conversion_function(fmt); + frame_info->rotation = new_state->rotation; + + vkms_plane_state->pixel_read_line = get_pixel_read_line_function(fmt); } static int vkms_plane_atomic_check(struct drm_plane *plane, @@ -198,12 +187,12 @@ static const struct drm_plane_helper_funcs vkms_plane_helper_funcs = { }; struct vkms_plane *vkms_plane_init(struct vkms_device *vkmsdev, - enum drm_plane_type type, int index) + enum drm_plane_type type) { struct drm_device *dev = &vkmsdev->drm; struct vkms_plane *plane; - plane = drmm_universal_plane_alloc(dev, struct vkms_plane, base, 1 << index, + plane = drmm_universal_plane_alloc(dev, struct vkms_plane, base, 0, &vkms_plane_funcs, vkms_formats, ARRAY_SIZE(vkms_formats), NULL, type, NULL); diff --git a/drivers/gpu/drm/vkms/vkms_writeback.c b/drivers/gpu/drm/vkms/vkms_writeback.c index 999d5c01ea81..79918b44fedd 100644 --- a/drivers/gpu/drm/vkms/vkms_writeback.c +++ b/drivers/gpu/drm/vkms/vkms_writeback.c @@ -149,11 +149,6 @@ static void vkms_wb_atomic_commit(struct drm_connector *conn, crtc_state->active_writeback = active_wb; crtc_state->wb_pending = true; spin_unlock_irq(&output->composer_lock); - - wb_frame_info->offset = fb->offsets[0]; - wb_frame_info->pitch = fb->pitches[0]; - wb_frame_info->cpp = fb->format->cpp[0]; - drm_writeback_queue_job(wb_conn, connector_state); active_wb->pixel_write = get_pixel_write_function(wb_format); drm_rect_init(&wb_frame_info->src, 0, 0, crtc_width, crtc_height); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 2c46897876dd..1699236fca5a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -35,7 +35,7 @@ #include "vmwgfx_vkms.h" #include "ttm_object.h" -#include <drm/drm_client_setup.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_ttm.h> #include <drm/drm_gem_ttm_helper.h> @@ -1634,7 +1634,6 @@ static const struct drm_driver driver = { .fops = &vmwgfx_driver_fops, .name = VMWGFX_DRIVER_NAME, .desc = VMWGFX_DRIVER_DESC, - .date = VMWGFX_DRIVER_DATE, .major = VMWGFX_DRIVER_MAJOR, .minor = VMWGFX_DRIVER_MINOR, .patchlevel = VMWGFX_DRIVER_PATCHLEVEL diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index b21831ef214a..5275ef632d4b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -57,7 +57,6 @@ #define VMWGFX_DRIVER_NAME "vmwgfx" -#define VMWGFX_DRIVER_DATE "20211206" #define VMWGFX_DRIVER_MAJOR 2 #define VMWGFX_DRIVER_MINOR 20 #define VMWGFX_DRIVER_PATCHLEVEL 0 diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 39949e0a493f..f0b429525467 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -479,7 +479,6 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) } drm_connector_helper_add(connector, &vmw_ldu_connector_helper_funcs); - connector->status = vmw_du_connector_detect(connector, true); ret = drm_encoder_init(dev, encoder, &vmw_legacy_encoder_funcs, DRM_MODE_ENCODER_VIRTUAL, NULL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 0f4bfd98480a..32029d80b72b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -868,7 +868,6 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit) } drm_connector_helper_add(connector, &vmw_sou_connector_helper_funcs); - connector->status = vmw_du_connector_detect(connector, true); ret = drm_encoder_init(dev, encoder, &vmw_screen_object_encoder_funcs, DRM_MODE_ENCODER_VIRTUAL, NULL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index 82d18b88f4a7..114a75069e1c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -1593,7 +1593,6 @@ static int vmw_stdu_init(struct vmw_private *dev_priv, unsigned unit) } drm_connector_helper_add(connector, &vmw_stdu_connector_helper_funcs); - connector->status = vmw_du_connector_detect(connector, false); ret = drm_encoder_init(dev, encoder, &vmw_stdu_encoder_funcs, DRM_MODE_ENCODER_VIRTUAL, NULL); diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index bc7a04ce69fd..7730e0596299 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -101,6 +101,7 @@ xe-y += xe_bb.o \ xe_trace.o \ xe_trace_bo.o \ xe_trace_guc.o \ + xe_trace_lrc.o \ xe_ttm_sys_mgr.o \ xe_ttm_stolen_mgr.o \ xe_ttm_vram_mgr.o \ @@ -110,6 +111,7 @@ xe-y += xe_bb.o \ xe_vm.o \ xe_vram.o \ xe_vram_freq.o \ + xe_vsec.o \ xe_wait_user_fence.o \ xe_wa.o \ xe_wopcm.o @@ -124,7 +126,8 @@ xe-y += \ xe_gt_sriov_vf.o \ xe_guc_relay.o \ xe_memirq.o \ - xe_sriov.o + xe_sriov.o \ + xe_sriov_vf.o xe-$(CONFIG_PCI_IOV) += \ xe_gt_sriov_pf.o \ diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h index b54fe40fc5a9..fee385532fb0 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h @@ -134,6 +134,8 @@ enum xe_guc_action { XE_GUC_ACTION_DEREGISTER_CONTEXT = 0x4503, XE_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER = 0x4505, XE_GUC_ACTION_DEREGISTER_COMMAND_TRANSPORT_BUFFER = 0x4506, + XE_GUC_ACTION_REGISTER_G2G = 0x4507, + XE_GUC_ACTION_DEREGISTER_G2G = 0x4508, XE_GUC_ACTION_DEREGISTER_CONTEXT_DONE = 0x4600, XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC = 0x4601, XE_GUC_ACTION_CLIENT_SOFT_RESET = 0x5507, @@ -218,4 +220,22 @@ enum xe_guc_tlb_inval_mode { XE_GUC_TLB_INVAL_MODE_LITE = 0x1, }; +/* + * GuC to GuC communication (de-)registration fields: + */ +enum xe_guc_g2g_type { + XE_G2G_TYPE_IN = 0x0, + XE_G2G_TYPE_OUT, + XE_G2G_TYPE_LIMIT, +}; + +#define XE_G2G_REGISTER_DEVICE REG_GENMASK(16, 16) +#define XE_G2G_REGISTER_TILE REG_GENMASK(15, 12) +#define XE_G2G_REGISTER_TYPE REG_GENMASK(11, 8) +#define XE_G2G_REGISTER_SIZE REG_GENMASK(7, 0) + +#define XE_G2G_DEREGISTER_DEVICE REG_GENMASK(16, 16) +#define XE_G2G_DEREGISTER_TILE REG_GENMASK(15, 12) +#define XE_G2G_DEREGISTER_TYPE REG_GENMASK(11, 8) + #endif diff --git a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h index b6a1852749dd..0b28659d94e9 100644 --- a/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_actions_sriov_abi.h @@ -502,6 +502,44 @@ #define VF2GUC_VF_RESET_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 /** + * DOC: VF2GUC_NOTIFY_RESFIX_DONE + * + * This action is used by VF to notify the GuC that the VF KMD has completed + * post-migration recovery steps. + * + * This message must be sent as `MMIO HXG Message`_. + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_HOST_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_REQUEST_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:16 | DATA0 = MBZ | + * | +-------+--------------------------------------------------------------+ + * | | 15:0 | ACTION = _`GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE` = 0x5508 | + * +---+-------+--------------------------------------------------------------+ + * + * +---+-------+--------------------------------------------------------------+ + * | | Bits | Description | + * +===+=======+==============================================================+ + * | 0 | 31 | ORIGIN = GUC_HXG_ORIGIN_GUC_ | + * | +-------+--------------------------------------------------------------+ + * | | 30:28 | TYPE = GUC_HXG_TYPE_RESPONSE_SUCCESS_ | + * | +-------+--------------------------------------------------------------+ + * | | 27:0 | DATA0 = MBZ | + * +---+-------+--------------------------------------------------------------+ + */ +#define GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE 0x5508u + +#define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_LEN GUC_HXG_REQUEST_MSG_MIN_LEN +#define VF2GUC_NOTIFY_RESFIX_DONE_REQUEST_MSG_0_MBZ GUC_HXG_REQUEST_MSG_0_DATA0 + +#define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_LEN GUC_HXG_RESPONSE_MSG_MIN_LEN +#define VF2GUC_NOTIFY_RESFIX_DONE_RESPONSE_MSG_0_MBZ GUC_HXG_RESPONSE_MSG_0_DATA0 + +/** * DOC: VF2GUC_QUERY_SINGLE_KLV * * This action is used by VF to query value of the single KLV data. diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h index 37606cf8cc5e..7dcb118e3d9f 100644 --- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h +++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h @@ -291,6 +291,14 @@ enum { * * :0: (default) * :1-65535: number of contexts (Gen12) + * + * _`GUC_KLV_VF_CFG_SCHED_PRIORITY` : 0x8A0C + * This config controls VF’s scheduling priority. + * + * :0: LOW = schedule VF only if it has active work (default) + * :1: NORMAL = schedule VF always, irrespective of whether it has work or not + * :2: HIGH = schedule VF in the next time-slice after current active + * time-slice completes if it has active work */ #define GUC_KLV_VF_CFG_GGTT_START_KEY 0x0001 @@ -343,6 +351,12 @@ enum { #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY 0x8a0b #define GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_LEN 1u +#define GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY 0x8a0c +#define GUC_KLV_VF_CFG_SCHED_PRIORITY_LEN 1u +#define GUC_SCHED_PRIORITY_LOW 0u +#define GUC_SCHED_PRIORITY_NORMAL 1u +#define GUC_SCHED_PRIORITY_HIGH 2u + /* * Workaround keys: */ diff --git a/drivers/gpu/drm/xe/display/ext/i915_irq.c b/drivers/gpu/drm/xe/display/ext/i915_irq.c index a7dbc6554d69..ac4cda2d81c7 100644 --- a/drivers/gpu/drm/xe/display/ext/i915_irq.c +++ b/drivers/gpu/drm/xe/display/ext/i915_irq.c @@ -53,18 +53,7 @@ void gen2_irq_init(struct intel_uncore *uncore, struct i915_irq_regs regs, bool intel_irqs_enabled(struct xe_device *xe) { - /* - * XXX: i915 has a racy handling of the irq.enabled, since it doesn't - * lock its transitions. Because of that, the irq.enabled sometimes - * is not read with the irq.lock in place. - * However, the most critical cases like vblank and page flips are - * properly using the locks. - * We cannot take the lock in here or run any kind of assert because - * of i915 inconsistency. - * But at this point the xe irq is better protected against races, - * although the full solution would be protecting the i915 side. - */ - return xe->irq.enabled; + return atomic_read(&xe->irq.enabled); } void intel_synchronize_irq(struct xe_device *xe) diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c index 9f54fad0f1c0..b463f5bd4eed 100644 --- a/drivers/gpu/drm/xe/display/intel_bo.c +++ b/drivers/gpu/drm/xe/display/intel_bo.c @@ -40,31 +40,8 @@ int intel_bo_fb_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) int intel_bo_read_from_page(struct drm_gem_object *obj, u64 offset, void *dst, int size) { struct xe_bo *bo = gem_to_xe_bo(obj); - struct ttm_bo_kmap_obj map; - void *src; - bool is_iomem; - int ret; - ret = xe_bo_lock(bo, true); - if (ret) - return ret; - - ret = ttm_bo_kmap(&bo->ttm, offset >> PAGE_SHIFT, 1, &map); - if (ret) - goto out_unlock; - - offset &= ~PAGE_MASK; - src = ttm_kmap_obj_virtual(&map, &is_iomem); - src += offset; - if (is_iomem) - memcpy_fromio(dst, (void __iomem *)src, size); - else - memcpy(dst, src, size); - - ttm_bo_kunmap(&map); -out_unlock: - xe_bo_unlock(bo); - return ret; + return xe_bo_read(bo, offset, dst, size); } struct intel_frontbuffer *intel_bo_get_frontbuffer(struct drm_gem_object *obj) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 761510ae0690..9fa51b84737c 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -161,7 +161,7 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb, } vma->dpt = dpt; - vma->node = dpt->ggtt_node; + vma->node = dpt->ggtt_node[tile0->id]; return 0; } @@ -213,8 +213,8 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb, if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) align = max_t(u32, align, SZ_64K); - if (bo->ggtt_node && view->type == I915_GTT_VIEW_NORMAL) { - vma->node = bo->ggtt_node; + if (bo->ggtt_node[ggtt->tile->id] && view->type == I915_GTT_VIEW_NORMAL) { + vma->node = bo->ggtt_node[ggtt->tile->id]; } else if (view->type == I915_GTT_VIEW_NORMAL) { u32 x, size = bo->ttm.base.size; @@ -345,10 +345,12 @@ err: static void __xe_unpin_fb_vma(struct i915_vma *vma) { + u8 tile_id = vma->node->ggtt->tile->id; + if (vma->dpt) xe_bo_unpin_map_no_vm(vma->dpt); - else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node) || - vma->bo->ggtt_node->base.start != vma->node->base.start) + else if (!xe_ggtt_node_allocated(vma->bo->ggtt_node[tile_id]) || + vma->bo->ggtt_node[tile_id]->base.start != vma->node->base.start) xe_ggtt_node_remove(vma->node, false); ttm_bo_reserve(&vma->bo->ttm, false, false, NULL); diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index 0c9e4b2fafab..162f18e975da 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -445,6 +445,8 @@ #define SAMPLER_MODE XE_REG_MCR(0xe18c, XE_REG_OPTION_MASKED) #define ENABLE_SMALLPL REG_BIT(15) +#define SMP_WAIT_FETCH_MERGING_COUNTER REG_GENMASK(11, 10) +#define SMP_FORCE_128B_OVERFETCH REG_FIELD_PREP(SMP_WAIT_FETCH_MERGING_COUNTER, 1) #define SC_DISABLE_POWER_OPTIMIZATION_EBB REG_BIT(9) #define SAMPLER_ENABLE_HEADLESS_MSG REG_BIT(5) #define INDIRECT_STATE_BASE_ADDR_OVERRIDE REG_BIT(0) diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h index a9b0091cb7ee..a49561e9f3c3 100644 --- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -41,14 +41,6 @@ #define OAG_OABUFFER XE_REG(0xdb08) #define OABUFFER_SIZE_MASK REG_GENMASK(5, 3) -#define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0) -#define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1) -#define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2) -#define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3) -#define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4) -#define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5) -#define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6) -#define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7) #define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ #define OAG_OACONTROL XE_REG(0xdaf4) @@ -63,6 +55,7 @@ #define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) #define OAG_OA_DEBUG_DISABLE_MMIO_TRG REG_BIT(14) #define OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL REG_BIT(13) +#define OAG_OA_DEBUG_BUF_SIZE_SELECT REG_BIT(12) #define OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL REG_BIT(8) #define OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL REG_BIT(7) #define OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6) diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h new file mode 100644 index 000000000000..f45abcd96ba8 --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_pmt.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ +#ifndef _XE_PMT_H_ +#define _XE_PMT_H_ + +#define SOC_BASE 0x280000 + +#define BMG_PMT_BASE_OFFSET 0xDB000 +#define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET) + +#define BMG_TELEMETRY_BASE_OFFSET 0xE0000 +#define BMG_TELEMETRY_OFFSET (SOC_BASE + BMG_TELEMETRY_BASE_OFFSET) + +#define SG_REMAP_INDEX1 XE_REG(SOC_BASE + 0x08) +#define SG_REMAP_BITS REG_GENMASK(31, 24) + +#endif diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c index 3e0ae40ebbd2..c9ec7a313c6b 100644 --- a/drivers/gpu/drm/xe/tests/xe_bo.c +++ b/drivers/gpu/drm/xe/tests/xe_bo.c @@ -49,6 +49,13 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo, KUNIT_FAIL(test, "Failed to submit bo clear.\n"); return PTR_ERR(fence); } + + if (dma_fence_wait_timeout(fence, false, 5 * HZ) <= 0) { + dma_fence_put(fence); + KUNIT_FAIL(test, "Timeout while clearing bo.\n"); + return -ETIME; + } + dma_fence_put(fence); } diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c index 1a192a2a941b..d5fe0ea889ad 100644 --- a/drivers/gpu/drm/xe/tests/xe_migrate.c +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c @@ -83,7 +83,8 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo, bo->size, ttm_bo_type_kernel, region | - XE_BO_FLAG_NEEDS_CPU_ACCESS); + XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); if (IS_ERR(remote)) { KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n", str, remote); @@ -224,8 +225,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test) XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_PINNED); if (IS_ERR(tiny)) { - KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n", - PTR_ERR(pt)); + KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n", + PTR_ERR(tiny)); goto free_pt; } @@ -642,7 +643,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, - XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS); + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); if (IS_ERR(sys_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", @@ -666,7 +669,8 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, - bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); if (IS_ERR(ccs_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", @@ -690,7 +694,8 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M, DRM_XE_GEM_CPU_CACHING_WC, - bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS); + bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | + XE_BO_FLAG_PINNED); if (IS_ERR(vram_bo)) { KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n", PTR_ERR(vram_bo)); diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h index 04d6b95c6d87..68fe70ce2be3 100644 --- a/drivers/gpu/drm/xe/xe_assert.h +++ b/drivers/gpu/drm/xe/xe_assert.h @@ -14,7 +14,7 @@ #include "xe_step.h" /** - * DOC: Xe ASSERTs + * DOC: Xe Asserts * * While Xe driver aims to be simpler than legacy i915 driver it is still * complex enough that some changes introduced while adding new functionality @@ -103,7 +103,7 @@ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions * or as a condition. * - * See `Xe ASSERTs`_ for general usage guidelines. + * See `Xe Asserts`_ for general usage guidelines. */ #define xe_assert(xe, condition) xe_assert_msg((xe), condition, "") #define xe_assert_msg(xe, condition, msg, arg...) ({ \ @@ -138,7 +138,7 @@ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions * or as a condition. * - * See `Xe ASSERTs`_ for general usage guidelines. + * See `Xe Asserts`_ for general usage guidelines. */ #define xe_tile_assert(tile, condition) xe_tile_assert_msg((tile), condition, "") #define xe_tile_assert_msg(tile, condition, msg, arg...) ({ \ @@ -162,7 +162,7 @@ * (&CONFIG_DRM_XE_DEBUG must be enabled) and cannot be used in expressions * or as a condition. * - * See `Xe ASSERTs`_ for general usage guidelines. + * See `Xe Asserts`_ for general usage guidelines. */ #define xe_gt_assert(gt, condition) xe_gt_assert_msg((gt), condition, "") #define xe_gt_assert_msg(gt, condition, msg, arg...) ({ \ diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index ae6b337cdc54..283cd0294570 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -162,6 +162,15 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo, } } +static bool force_contiguous(u32 bo_flags) +{ + /* + * For eviction / restore on suspend / resume objects pinned in VRAM + * must be contiguous, also only contiguous BOs support xe_bo_vmap. + */ + return bo_flags & (XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT); +} + static void add_vram(struct xe_device *xe, struct xe_bo *bo, struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c) { @@ -175,12 +184,7 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo, xe_assert(xe, vram && vram->usable_size); io_size = vram->io_size; - /* - * For eviction / restore on suspend / resume objects - * pinned in VRAM must be contiguous - */ - if (bo_flags & (XE_BO_FLAG_PINNED | - XE_BO_FLAG_GGTT)) + if (force_contiguous(bo_flags)) place.flags |= TTM_PL_FLAG_CONTIGUOUS; if (io_size < vram->usable_size) { @@ -212,8 +216,7 @@ static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo, bo->placements[*c] = (struct ttm_place) { .mem_type = XE_PL_STOLEN, - .flags = bo_flags & (XE_BO_FLAG_PINNED | - XE_BO_FLAG_GGTT) ? + .flags = force_contiguous(bo_flags) ? TTM_PL_FLAG_CONTIGUOUS : 0, }; *c += 1; @@ -442,6 +445,14 @@ static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt) kfree(tt); } +static bool xe_ttm_resource_visible(struct ttm_resource *mem) +{ + struct xe_ttm_vram_mgr_resource *vres = + to_xe_ttm_vram_mgr_resource(mem); + + return vres->used_visible_size == mem->size; +} + static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem) { @@ -453,11 +464,9 @@ static int xe_ttm_io_mem_reserve(struct ttm_device *bdev, return 0; case XE_PL_VRAM0: case XE_PL_VRAM1: { - struct xe_ttm_vram_mgr_resource *vres = - to_xe_ttm_vram_mgr_resource(mem); struct xe_mem_region *vram = res_to_mem_region(mem); - if (vres->used_visible_size < mem->size) + if (!xe_ttm_resource_visible(mem)) return -EINVAL; mem->bus.offset = mem->start << PAGE_SHIFT; @@ -876,6 +885,7 @@ int xe_bo_evict_pinned(struct xe_bo *bo) }; struct ttm_operation_ctx ctx = { .interruptible = false, + .gfp_retry_mayfail = true, }; struct ttm_resource *new_mem; int ret; @@ -937,6 +947,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo) { struct ttm_operation_ctx ctx = { .interruptible = false, + .gfp_retry_mayfail = false, }; struct ttm_resource *new_mem; struct ttm_place *place = &bo->placements[0]; @@ -1106,7 +1117,8 @@ static void xe_ttm_bo_purge(struct ttm_buffer_object *ttm_bo, struct ttm_operati static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo) { struct ttm_operation_ctx ctx = { - .interruptible = false + .interruptible = false, + .gfp_retry_mayfail = false, }; if (ttm_bo->ttm) { @@ -1118,6 +1130,52 @@ static void xe_ttm_bo_swap_notify(struct ttm_buffer_object *ttm_bo) } } +static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo, + unsigned long offset, void *buf, int len, + int write) +{ + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + struct iosys_map vmap; + struct xe_res_cursor cursor; + struct xe_mem_region *vram; + int bytes_left = len; + + xe_bo_assert_held(bo); + xe_device_assert_mem_access(xe); + + if (!mem_type_is_vram(ttm_bo->resource->mem_type)) + return -EIO; + + /* FIXME: Use GPU for non-visible VRAM */ + if (!xe_ttm_resource_visible(ttm_bo->resource)) + return -EIO; + + vram = res_to_mem_region(ttm_bo->resource); + xe_res_first(ttm_bo->resource, offset & PAGE_MASK, + bo->size - (offset & PAGE_MASK), &cursor); + + do { + unsigned long page_offset = (offset & ~PAGE_MASK); + int byte_count = min((int)(PAGE_SIZE - page_offset), bytes_left); + + iosys_map_set_vaddr_iomem(&vmap, (u8 __iomem *)vram->mapping + + cursor.start); + if (write) + xe_map_memcpy_to(xe, &vmap, page_offset, buf, byte_count); + else + xe_map_memcpy_from(xe, buf, &vmap, page_offset, byte_count); + + buf += byte_count; + offset += byte_count; + bytes_left -= byte_count; + if (bytes_left) + xe_res_next(&cursor, PAGE_SIZE); + } while (bytes_left); + + return len; +} + const struct ttm_device_funcs xe_ttm_funcs = { .ttm_tt_create = xe_ttm_tt_create, .ttm_tt_populate = xe_ttm_tt_populate, @@ -1127,6 +1185,7 @@ const struct ttm_device_funcs xe_ttm_funcs = { .move = xe_bo_move, .io_mem_reserve = xe_ttm_io_mem_reserve, .io_mem_pfn = xe_ttm_io_mem_pfn, + .access_memory = xe_ttm_access_memory, .release_notify = xe_ttm_bo_release_notify, .eviction_valuable = ttm_bo_eviction_valuable, .delete_mem_notify = xe_ttm_bo_delete_mem_notify, @@ -1137,6 +1196,8 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) { struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev); + struct xe_tile *tile; + u8 id; if (bo->ttm.base.import_attach) drm_prime_gem_destroy(&bo->ttm.base, NULL); @@ -1144,8 +1205,9 @@ static void xe_ttm_bo_destroy(struct ttm_buffer_object *ttm_bo) xe_assert(xe, list_empty(&ttm_bo->base.gpuva.list)); - if (bo->ggtt_node && bo->ggtt_node->base.size) - xe_ggtt_remove_bo(bo->tile->mem.ggtt, bo); + for_each_tile(tile, xe, id) + if (bo->ggtt_node[id] && bo->ggtt_node[id]->base.size) + xe_ggtt_remove_bo(tile->mem.ggtt, bo); #ifdef CONFIG_PROC_FS if (bo->client) @@ -1243,11 +1305,50 @@ out: return ret; } +static int xe_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write) +{ + struct ttm_buffer_object *ttm_bo = vma->vm_private_data; + struct xe_bo *bo = ttm_to_xe_bo(ttm_bo); + struct xe_device *xe = xe_bo_device(bo); + int ret; + + xe_pm_runtime_get(xe); + ret = ttm_bo_vm_access(vma, addr, buf, len, write); + xe_pm_runtime_put(xe); + + return ret; +} + +/** + * xe_bo_read() - Read from an xe_bo + * @bo: The buffer object to read from. + * @offset: The byte offset to start reading from. + * @dst: Location to store the read. + * @size: Size in bytes for the read. + * + * Read @size bytes from the @bo, starting from @offset, storing into @dst. + * + * Return: Zero on success, or negative error. + */ +int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size) +{ + int ret; + + ret = ttm_bo_access(&bo->ttm, offset, dst, size, 0); + if (ret >= 0 && ret != size) + ret = -EIO; + else if (ret == size) + ret = 0; + + return ret; +} + static const struct vm_operations_struct xe_gem_vm_ops = { .fault = xe_gem_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, - .access = ttm_bo_vm_access + .access = xe_bo_vm_access, }; static const struct drm_gem_object_funcs xe_gem_object_funcs = { @@ -1301,6 +1402,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, + .gfp_retry_mayfail = true, }; struct ttm_placement *placement; uint32_t alignment; @@ -1315,6 +1417,10 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo, return ERR_PTR(-EINVAL); } + /* XE_BO_FLAG_GGTTx requires XE_BO_FLAG_GGTT also be set */ + if ((flags & XE_BO_FLAG_GGTT_ALL) && !(flags & XE_BO_FLAG_GGTT)) + return ERR_PTR(-EINVAL); + if (flags & (XE_BO_FLAG_VRAM_MASK | XE_BO_FLAG_STOLEN) && !(flags & XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE) && ((xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) || @@ -1505,19 +1611,29 @@ __xe_bo_create_locked(struct xe_device *xe, bo->vm = vm; if (bo->flags & XE_BO_FLAG_GGTT) { - if (!tile && flags & XE_BO_FLAG_STOLEN) - tile = xe_device_get_root_tile(xe); + struct xe_tile *t; + u8 id; - xe_assert(xe, tile); + if (!(bo->flags & XE_BO_FLAG_GGTT_ALL)) { + if (!tile && flags & XE_BO_FLAG_STOLEN) + tile = xe_device_get_root_tile(xe); - if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { - err = xe_ggtt_insert_bo_at(tile->mem.ggtt, bo, - start + bo->size, U64_MAX); - } else { - err = xe_ggtt_insert_bo(tile->mem.ggtt, bo); + xe_assert(xe, tile); + } + + for_each_tile(t, xe, id) { + if (t != tile && !(bo->flags & XE_BO_FLAG_GGTTx(t))) + continue; + + if (flags & XE_BO_FLAG_FIXED_PLACEMENT) { + err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo, + start + bo->size, U64_MAX); + } else { + err = xe_ggtt_insert_bo(t->mem.ggtt, bo); + } + if (err) + goto err_unlock_put_bo; } - if (err) - goto err_unlock_put_bo; } return bo; @@ -1900,6 +2016,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, + .gfp_retry_mayfail = true, }; if (vm) { @@ -1910,6 +2027,7 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict) ctx.resv = xe_vm_resv(vm); } + trace_xe_bo_validate(bo); return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx); } @@ -1961,13 +2079,15 @@ dma_addr_t xe_bo_addr(struct xe_bo *bo, u64 offset, size_t page_size) int xe_bo_vmap(struct xe_bo *bo) { + struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev); void *virtual; bool is_iomem; int ret; xe_bo_assert_held(bo); - if (!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) + if (drm_WARN_ON(&xe->drm, !(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS) || + !force_contiguous(bo->flags))) return -EINVAL; if (!iosys_map_is_null(&bo->vmap)) @@ -2243,6 +2363,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type) struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, + .gfp_retry_mayfail = true, }; struct ttm_placement placement; struct ttm_place requested; @@ -2293,6 +2414,7 @@ int xe_bo_evict(struct xe_bo *bo, bool force_alloc) .interruptible = false, .no_wait_gpu = false, .force_alloc = force_alloc, + .gfp_retry_mayfail = true, }; struct ttm_placement placement; int ret; @@ -2372,14 +2494,18 @@ void xe_bo_put_commit(struct llist_head *deferred) void xe_bo_put(struct xe_bo *bo) { + struct xe_tile *tile; + u8 id; + might_sleep(); if (bo) { #ifdef CONFIG_PROC_FS if (bo->client) might_lock(&bo->client->bos_lock); #endif - if (bo->ggtt_node && bo->ggtt_node->ggtt) - might_lock(&bo->ggtt_node->ggtt->lock); + for_each_tile(tile, xe_bo_device(bo), id) + if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt) + might_lock(&bo->ggtt_node[id]->ggtt->lock); drm_gem_object_put(&bo->ttm.base); } } diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h index 7fa44a0138b0..d9386ab03140 100644 --- a/drivers/gpu/drm/xe/xe_bo.h +++ b/drivers/gpu/drm/xe/xe_bo.h @@ -39,10 +39,22 @@ #define XE_BO_FLAG_NEEDS_64K BIT(15) #define XE_BO_FLAG_NEEDS_2M BIT(16) #define XE_BO_FLAG_GGTT_INVALIDATE BIT(17) +#define XE_BO_FLAG_GGTT0 BIT(18) +#define XE_BO_FLAG_GGTT1 BIT(19) +#define XE_BO_FLAG_GGTT2 BIT(20) +#define XE_BO_FLAG_GGTT3 BIT(21) +#define XE_BO_FLAG_GGTT_ALL (XE_BO_FLAG_GGTT0 | \ + XE_BO_FLAG_GGTT1 | \ + XE_BO_FLAG_GGTT2 | \ + XE_BO_FLAG_GGTT3) + /* this one is trigger internally only */ #define XE_BO_FLAG_INTERNAL_TEST BIT(30) #define XE_BO_FLAG_INTERNAL_64K BIT(31) +#define XE_BO_FLAG_GGTTx(tile) \ + (XE_BO_FLAG_GGTT0 << (tile)->id) + #define XE_PTE_SHIFT 12 #define XE_PAGE_SIZE (1 << XE_PTE_SHIFT) #define XE_PTE_MASK (XE_PAGE_SIZE - 1) @@ -194,18 +206,29 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size) } static inline u32 -xe_bo_ggtt_addr(struct xe_bo *bo) +__xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id) { - if (XE_WARN_ON(!bo->ggtt_node)) + struct xe_ggtt_node *ggtt_node = bo->ggtt_node[tile_id]; + + if (XE_WARN_ON(!ggtt_node)) return 0; - XE_WARN_ON(bo->ggtt_node->base.size > bo->size); - XE_WARN_ON(bo->ggtt_node->base.start + bo->ggtt_node->base.size > (1ull << 32)); - return bo->ggtt_node->base.start; + XE_WARN_ON(ggtt_node->base.size > bo->size); + XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32)); + return ggtt_node->base.start; +} + +static inline u32 +xe_bo_ggtt_addr(struct xe_bo *bo) +{ + xe_assert(xe_bo_device(bo), bo->tile); + + return __xe_bo_ggtt_addr(bo, bo->tile->id); } int xe_bo_vmap(struct xe_bo *bo); void xe_bo_vunmap(struct xe_bo *bo); +int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size); bool mem_type_is_vram(u32 mem_type); bool xe_bo_is_vram(struct xe_bo *bo); diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c index 8fb2be061003..6a40eedd9db1 100644 --- a/drivers/gpu/drm/xe/xe_bo_evict.c +++ b/drivers/gpu/drm/xe/xe_bo_evict.c @@ -152,11 +152,17 @@ int xe_bo_restore_kernel(struct xe_device *xe) } if (bo->flags & XE_BO_FLAG_GGTT) { - struct xe_tile *tile = bo->tile; + struct xe_tile *tile; + u8 id; - mutex_lock(&tile->mem.ggtt->lock); - xe_ggtt_map_bo(tile->mem.ggtt, bo); - mutex_unlock(&tile->mem.ggtt->lock); + for_each_tile(tile, xe, id) { + if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile))) + continue; + + mutex_lock(&tile->mem.ggtt->lock); + xe_ggtt_map_bo(tile->mem.ggtt, bo); + mutex_unlock(&tile->mem.ggtt->lock); + } } /* diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 13c6d8a69e91..46dc9e4e3e46 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -10,9 +10,9 @@ #include <drm/ttm/ttm_bo.h> #include <drm/ttm/ttm_device.h> -#include <drm/ttm/ttm_execbuf_util.h> #include <drm/ttm/ttm_placement.h> +#include "xe_device_types.h" #include "xe_ggtt_types.h" struct xe_device; @@ -39,8 +39,8 @@ struct xe_bo { struct ttm_place placements[XE_BO_MAX_PLACEMENTS]; /** @placement: current placement for this BO */ struct ttm_placement placement; - /** @ggtt_node: GGTT node if this BO is mapped in the GGTT */ - struct xe_ggtt_node *ggtt_node; + /** @ggtt_node: Array of GGTT nodes if this BO is mapped in the GGTTs */ + struct xe_ggtt_node *ggtt_node[XE_MAX_TILES_PER_DEVICE]; /** @vmap: iosys map of this buffer */ struct iosys_map vmap; /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */ diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index f8947e7e917e..71636e80b71d 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -30,30 +30,39 @@ /** * DOC: Xe device coredump * - * Devices overview: * Xe uses dev_coredump infrastructure for exposing the crash errors in a - * standardized way. - * devcoredump exposes a temporary device under /sys/class/devcoredump/ - * which is linked with our card device directly. - * The core dump can be accessed either from - * /sys/class/drm/card<n>/device/devcoredump/ or from - * /sys/class/devcoredump/devcd<m> where - * /sys/class/devcoredump/devcd<m>/failing_device is a link to - * /sys/class/drm/card<n>/device/. + * standardized way. Once a crash occurs, devcoredump exposes a temporary + * node under ``/sys/class/devcoredump/devcd<m>/``. The same node is also + * accessible in ``/sys/class/drm/card<n>/device/devcoredump/``. The + * ``failing_device`` symlink points to the device that crashed and created the + * coredump. * - * Snapshot at hang: - * The 'data' file is printed with a drm_printer pointer at devcoredump read - * time. For this reason, we need to take snapshots from when the hang has - * happened, and not only when the user is reading the file. Otherwise the - * information is outdated since the resets might have happened in between. + * The following characteristics are observed by xe when creating a device + * coredump: * - * 'First' failure snapshot: - * In general, the first hang is the most critical one since the following hangs - * can be a consequence of the initial hang. For this reason we only take the - * snapshot of the 'first' failure and ignore subsequent calls of this function, - * at least while the coredump device is alive. Dev_coredump has a delayed work - * queue that will eventually delete the device and free all the dump - * information. + * **Snapshot at hang**: + * The 'data' file contains a snapshot of the HW and driver states at the time + * the hang happened. Due to the driver recovering from resets/crashes, it may + * not correspond to the state of the system when the file is read by + * userspace. + * + * **Coredump release**: + * After a coredump is generated, it stays in kernel memory until released by + * userpace by writing anything to it, or after an internal timer expires. The + * exact timeout may vary and should not be relied upon. Example to release + * a coredump: + * + * .. code-block:: shell + * + * $ > /sys/class/drm/card0/device/devcoredump/data + * + * **First failure only**: + * In general, the first hang is the most critical one since the following + * hangs can be a consequence of the initial hang. For this reason a snapshot + * is taken only for the first failure. Until the devcoredump is released by + * userspace or kernel, all subsequent hangs do not override the snapshot nor + * create new ones. Devcoredump has a delayed work queue that will eventually + * delete the file node and free all the dump information. */ #ifdef CONFIG_DEV_COREDUMP @@ -91,6 +100,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, p = drm_coredump_printer(&iter); drm_puts(&p, "**** Xe Device Coredump ****\n"); + drm_printf(&p, "Reason: %s\n", ss->reason); drm_puts(&p, "kernel: " UTS_RELEASE "\n"); drm_puts(&p, "module: " KBUILD_MODNAME "\n"); @@ -98,7 +108,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); ts = ktime_to_timespec64(ss->boot_time); drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec); - drm_printf(&p, "Process: %s\n", ss->process_name); + drm_printf(&p, "Process: %s [%d]\n", ss->process_name, ss->pid); xe_device_snapshot_print(xe, &p); drm_printf(&p, "\n**** GT #%d ****\n", ss->gt->info.id); @@ -130,6 +140,9 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) { int i; + kfree(ss->reason); + ss->reason = NULL; + xe_guc_log_snapshot_free(ss->guc.log); ss->guc.log = NULL; @@ -170,16 +183,24 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); - if (!ss->read.buffer) + mutex_lock(&coredump->lock); + + if (!ss->read.buffer) { + mutex_unlock(&coredump->lock); return -ENODEV; + } - if (offset >= ss->read.size) + if (offset >= ss->read.size) { + mutex_unlock(&coredump->lock); return 0; + } byte_copied = count < ss->read.size - offset ? count : ss->read.size - offset; memcpy(buffer, ss->read.buffer + offset, byte_copied); + mutex_unlock(&coredump->lock); + return byte_copied; } @@ -193,15 +214,18 @@ static void xe_devcoredump_free(void *data) cancel_work_sync(&coredump->snapshot.work); + mutex_lock(&coredump->lock); + xe_devcoredump_snapshot_free(&coredump->snapshot); kvfree(coredump->snapshot.read.buffer); /* To prevent stale data on next snapshot, clear everything */ memset(&coredump->snapshot, 0, sizeof(coredump->snapshot)); coredump->captured = false; - coredump->job = NULL; drm_info(&coredump_to_xe(coredump)->drm, "Xe device coredump has been deleted.\n"); + + mutex_unlock(&coredump->lock); } static void xe_devcoredump_deferred_snap_work(struct work_struct *work) @@ -244,10 +268,10 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) } static void devcoredump_snapshot(struct xe_devcoredump *coredump, + struct xe_exec_queue *q, struct xe_sched_job *job) { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; - struct xe_exec_queue *q = job->q; struct xe_guc *guc = exec_queue_to_guc(q); u32 adj_logical_mask = q->logical_mask; u32 width_mask = (0x1 << q->width) - 1; @@ -260,12 +284,14 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); - if (q->vm && q->vm->xef) + if (q->vm && q->vm->xef) { process_name = q->vm->xef->process_name; + ss->pid = q->vm->xef->pid; + } + strscpy(ss->process_name, process_name); ss->gt = q->gt; - coredump->job = job; INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); cookie = dma_fence_begin_signalling(); @@ -284,10 +310,11 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, ss->guc.log = xe_guc_log_snapshot_capture(&guc->log, true); ss->guc.ct = xe_guc_ct_snapshot_capture(&guc->ct); ss->ge = xe_guc_exec_queue_snapshot_capture(q); - ss->job = xe_sched_job_snapshot_capture(job); + if (job) + ss->job = xe_sched_job_snapshot_capture(job); ss->vm = xe_vm_snapshot_capture(q->vm); - xe_engine_snapshot_capture_for_job(job); + xe_engine_snapshot_capture_for_queue(q); queue_work(system_unbound_wq, &ss->work); @@ -297,28 +324,42 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, /** * xe_devcoredump - Take the required snapshots and initialize coredump device. + * @q: The faulty xe_exec_queue, where the issue was detected. * @job: The faulty xe_sched_job, where the issue was detected. + * @fmt: Printf format + args to describe the reason for the core dump * * This function should be called at the crash time within the serialized * gt_reset. It is skipped if we still have the core dump device available * with the information of the 'first' snapshot. */ -void xe_devcoredump(struct xe_sched_job *job) +__printf(3, 4) +void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job, const char *fmt, ...) { - struct xe_device *xe = gt_to_xe(job->q->gt); + struct xe_device *xe = gt_to_xe(q->gt); struct xe_devcoredump *coredump = &xe->devcoredump; + va_list varg; + + mutex_lock(&coredump->lock); if (coredump->captured) { drm_dbg(&xe->drm, "Multiple hangs are occurring, but only the first snapshot was taken\n"); + mutex_unlock(&coredump->lock); return; } coredump->captured = true; - devcoredump_snapshot(coredump, job); + + va_start(varg, fmt); + coredump->snapshot.reason = kvasprintf(GFP_ATOMIC, fmt, varg); + va_end(varg); + + devcoredump_snapshot(coredump, q, job); drm_info(&xe->drm, "Xe device coredump has been created\n"); drm_info(&xe->drm, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", xe->drm.primary->index); + + mutex_unlock(&coredump->lock); } static void xe_driver_devcoredump_fini(void *arg) @@ -330,6 +371,18 @@ static void xe_driver_devcoredump_fini(void *arg) int xe_devcoredump_init(struct xe_device *xe) { + int err; + + err = drmm_mutex_init(&xe->drm, &xe->devcoredump.lock); + if (err) + return err; + + if (IS_ENABLED(CONFIG_LOCKDEP)) { + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&xe->devcoredump.lock); + fs_reclaim_release(GFP_KERNEL); + } + return devm_add_action_or_reset(xe->drm.dev, xe_driver_devcoredump_fini, &xe->drm); } diff --git a/drivers/gpu/drm/xe/xe_devcoredump.h b/drivers/gpu/drm/xe/xe_devcoredump.h index a4eebc285fc8..6a17e6d60102 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.h +++ b/drivers/gpu/drm/xe/xe_devcoredump.h @@ -10,13 +10,16 @@ struct drm_printer; struct xe_device; +struct xe_exec_queue; struct xe_sched_job; #ifdef CONFIG_DEV_COREDUMP -void xe_devcoredump(struct xe_sched_job *job); +void xe_devcoredump(struct xe_exec_queue *q, struct xe_sched_job *job, const char *fmt, ...); int xe_devcoredump_init(struct xe_device *xe); #else -static inline void xe_devcoredump(struct xe_sched_job *job) +static inline void xe_devcoredump(struct xe_exec_queue *q, + struct xe_sched_job *job, + const char *fmt, ...) { } diff --git a/drivers/gpu/drm/xe/xe_devcoredump_types.h b/drivers/gpu/drm/xe/xe_devcoredump_types.h index 3703ddea1252..1a1d16a96b2d 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump_types.h +++ b/drivers/gpu/drm/xe/xe_devcoredump_types.h @@ -28,6 +28,10 @@ struct xe_devcoredump_snapshot { ktime_t boot_time; /** @process_name: Name of process that triggered this gpu hang */ char process_name[TASK_COMM_LEN]; + /** @pid: Process id of process that triggered this gpu hang */ + pid_t pid; + /** @reason: The reason the coredump was triggered */ + char *reason; /** @gt: Affected GT, used by forcewake for delayed capture */ struct xe_gt *gt; @@ -76,12 +80,12 @@ struct xe_devcoredump_snapshot { * for reading the information. */ struct xe_devcoredump { - /** @captured: The snapshot of the first hang has already been taken. */ + /** @lock: protects access to entire structure */ + struct mutex lock; + /** @captured: The snapshot of the first hang has already been taken */ bool captured; /** @snapshot: Snapshot is captured at time of the first crash */ struct xe_devcoredump_snapshot snapshot; - /** @job: Point to the faulting job */ - struct xe_sched_job *job; }; #endif diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 06d6db8b50f9..56d4ffb650da 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -44,6 +44,7 @@ #include "xe_memirq.h" #include "xe_mmio.h" #include "xe_module.h" +#include "xe_oa.h" #include "xe_observation.h" #include "xe_pat.h" #include "xe_pcode.h" @@ -55,6 +56,7 @@ #include "xe_ttm_sys_mgr.h" #include "xe_vm.h" #include "xe_vram.h" +#include "xe_vsec.h" #include "xe_wait_user_fence.h" #include "xe_wa.h" @@ -269,7 +271,6 @@ static struct drm_driver driver = { .fops = &xe_driver_fops, .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = DRIVER_MAJOR, .minor = DRIVER_MINOR, .patchlevel = DRIVER_PATCHLEVEL, @@ -366,6 +367,10 @@ struct xe_device *xe_device_create(struct pci_dev *pdev, goto err; } + err = drmm_mutex_init(&xe->drm, &xe->pmt.lock); + if (err) + goto err; + err = xe_display_create(xe); if (WARN_ON(err)) goto err; @@ -760,6 +765,8 @@ int xe_device_probe(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_sanitize_freq(gt); + xe_vsec_init(xe); + return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe); err_fini_display: diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index b9ea455d6f59..ace22e35e769 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -16,7 +16,7 @@ #include "xe_heci_gsc.h" #include "xe_lmtt_types.h" #include "xe_memirq_types.h" -#include "xe_oa.h" +#include "xe_oa_types.h" #include "xe_platform_types.h" #include "xe_pt_types.h" #include "xe_sriov_types.h" @@ -42,8 +42,6 @@ struct xe_pat_ops; #define GRAPHICS_VERx100(xe) ((xe)->info.graphics_verx100) #define MEDIA_VERx100(xe) ((xe)->info.media_verx100) #define IS_DGFX(xe) ((xe)->info.is_dgfx) -#define HAS_HECI_GSCFI(xe) ((xe)->info.has_heci_gscfi) -#define HAS_HECI_CSCFI(xe) ((xe)->info.has_heci_cscfi) #define XE_VRAM_FLAGS_NEED64K BIT(0) @@ -296,14 +294,24 @@ struct xe_device { /** @info.va_bits: Maximum bits of a virtual address */ u8 va_bits; - /** @info.is_dgfx: is discrete device */ - u8 is_dgfx:1; - /** @info.has_asid: Has address space ID */ - u8 has_asid:1; + /* + * Keep all flags below alphabetically sorted + */ + /** @info.force_execlist: Forced execlist submission */ u8 force_execlist:1; + /** @info.has_asid: Has address space ID */ + u8 has_asid:1; + /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */ + u8 has_atomic_enable_pte_bit:1; + /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ + u8 has_device_atomics_on_smem:1; /** @info.has_flat_ccs: Whether flat CCS metadata is used */ u8 has_flat_ccs:1; + /** @info.has_heci_cscfi: device has heci cscfi */ + u8 has_heci_cscfi:1; + /** @info.has_heci_gscfi: device has heci gscfi */ + u8 has_heci_gscfi:1; /** @info.has_llc: Device has a shared CPU+GPU last level cache */ u8 has_llc:1; /** @info.has_mmio_ext: Device has extra MMIO address range */ @@ -314,6 +322,8 @@ struct xe_device { u8 has_sriov:1; /** @info.has_usm: Device has unified shared memory support */ u8 has_usm:1; + /** @info.is_dgfx: is discrete device */ + u8 is_dgfx:1; /** * @info.probe_display: Probe display hardware. If set to * false, the driver will behave as if there is no display @@ -323,20 +333,12 @@ struct xe_device { * state the firmware or bootloader left it in. */ u8 probe_display:1; + /** @info.skip_guc_pc: Skip GuC based PM feature init */ + u8 skip_guc_pc:1; /** @info.skip_mtcfg: skip Multi-Tile configuration from MTCFG register */ u8 skip_mtcfg:1; /** @info.skip_pcode: skip access to PCODE uC */ u8 skip_pcode:1; - /** @info.has_heci_gscfi: device has heci gscfi */ - u8 has_heci_gscfi:1; - /** @info.has_heci_cscfi: device has heci cscfi */ - u8 has_heci_cscfi:1; - /** @info.skip_guc_pc: Skip GuC based PM feature init */ - u8 skip_guc_pc:1; - /** @info.has_atomic_enable_pte_bit: Device has atomic enable PTE bit */ - u8 has_atomic_enable_pte_bit:1; - /** @info.has_device_atomics_on_smem: Supports device atomics on SMEM */ - u8 has_device_atomics_on_smem:1; } info; /** @irq: device interrupt state */ @@ -345,7 +347,7 @@ struct xe_device { spinlock_t lock; /** @irq.enabled: interrupts enabled on this device */ - bool enabled; + atomic_t enabled; } irq; /** @ttm: ttm device */ @@ -374,6 +376,8 @@ struct xe_device { /** @sriov.pf: PF specific data */ struct xe_device_pf pf; + /** @sriov.vf: VF specific data */ + struct xe_device_vf vf; /** @sriov.wq: workqueue used by the virtualization workers */ struct workqueue_struct *wq; @@ -481,6 +485,12 @@ struct xe_device { struct mutex lock; } d3cold; + /** @pmt: Support the PMT driver callback interface */ + struct { + /** @pmt.lock: protect access for telemetry data */ + struct mutex lock; + } pmt; + /** * @pm_callback_task: Track the active task that is running in either * the runtime_suspend or runtime_resume callbacks. @@ -588,7 +598,7 @@ struct xe_file { /** @vm.xe: xarray to store VMs */ struct xarray xa; /** - * @vm.lock: Protects VM lookup + reference and removal a from + * @vm.lock: Protects VM lookup + reference and removal from * file xarray. Not an intended to be an outer lock which does * thing while being held. */ @@ -601,10 +611,15 @@ struct xe_file { struct xarray xa; /** * @exec_queue.lock: Protects exec queue lookup + reference and - * removal a frommfile xarray. Not an intended to be an outer - * lock which does thing while being held. + * removal from file xarray. Not intended to be an outer lock + * which does things while being held. */ struct mutex lock; + /** + * @exec_queue.pending_removal: items pending to be removed to + * synchronize GPU state update with ongoing query. + */ + atomic_t pending_removal; } exec_queue; /** @run_ticks: hw engine class run time in ticks for this drm client */ diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 22f0f1a6dfd5..298a587da7f1 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -269,6 +269,49 @@ static void show_meminfo(struct drm_printer *p, struct drm_file *file) } } +static struct xe_hw_engine *any_engine(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned long gt_id; + + for_each_gt(gt, xe, gt_id) { + struct xe_hw_engine *hwe = xe_gt_any_hw_engine(gt); + + if (hwe) + return hwe; + } + + return NULL; +} + +static bool force_wake_get_any_engine(struct xe_device *xe, + struct xe_hw_engine **phwe, + unsigned int *pfw_ref) +{ + enum xe_force_wake_domains domain; + unsigned int fw_ref; + struct xe_hw_engine *hwe; + struct xe_force_wake *fw; + + hwe = any_engine(xe); + if (!hwe) + return false; + + domain = xe_hw_engine_to_fw_domain(hwe); + fw = gt_to_fw(hwe->gt); + + fw_ref = xe_force_wake_get(fw, domain); + if (!xe_force_wake_ref_has_domain(fw_ref, domain)) { + xe_force_wake_put(fw, fw_ref); + return false; + } + + *phwe = hwe; + *pfw_ref = fw_ref; + + return true; +} + static void show_run_ticks(struct drm_printer *p, struct drm_file *file) { unsigned long class, i, gt_id, capacity[XE_ENGINE_CLASS_MAX] = { }; @@ -280,7 +323,18 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) u64 gpu_timestamp; unsigned int fw_ref; + /* + * Wait for any exec queue going away: their cycles will get updated on + * context switch out, so wait for that to happen + */ + wait_var_event(&xef->exec_queue.pending_removal, + !atomic_read(&xef->exec_queue.pending_removal)); + xe_pm_runtime_get(xe); + if (!force_wake_get_any_engine(xe, &hwe, &fw_ref)) { + xe_pm_runtime_put(xe); + return; + } /* Accumulate all the exec queues from this client */ mutex_lock(&xef->exec_queue.lock); @@ -295,33 +349,11 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) } mutex_unlock(&xef->exec_queue.lock); - /* Get the total GPU cycles */ - for_each_gt(gt, xe, gt_id) { - enum xe_force_wake_domains fw; - - hwe = xe_gt_any_hw_engine(gt); - if (!hwe) - continue; - - fw = xe_hw_engine_to_fw_domain(hwe); - - fw_ref = xe_force_wake_get(gt_to_fw(gt), fw); - if (!xe_force_wake_ref_has_domain(fw_ref, fw)) { - hwe = NULL; - xe_force_wake_put(gt_to_fw(gt), fw_ref); - break; - } - - gpu_timestamp = xe_hw_engine_read_timestamp(hwe); - xe_force_wake_put(gt_to_fw(gt), fw_ref); - break; - } + gpu_timestamp = xe_hw_engine_read_timestamp(hwe); + xe_force_wake_put(gt_to_fw(hwe->gt), fw_ref); xe_pm_runtime_put(xe); - if (unlikely(!hwe)) - return; - for (class = 0; class < XE_ENGINE_CLASS_MAX; class++) { const char *class_name; diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h index d45b71426cc8..d61650d4aa0b 100644 --- a/drivers/gpu/drm/xe/xe_drv.h +++ b/drivers/gpu/drm/xe/xe_drv.h @@ -10,7 +10,6 @@ #define DRIVER_NAME "xe" #define DRIVER_DESC "Intel Xe Graphics" -#define DRIVER_DATE "20201103" /* Interface history: * diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index fd0f3b3c9101..aab9e561153d 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -240,6 +240,7 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe, return q; } +ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO); void xe_exec_queue_destroy(struct kref *ref) { @@ -262,8 +263,11 @@ void xe_exec_queue_fini(struct xe_exec_queue *q) /* * Before releasing our ref to lrc and xef, accumulate our run ticks + * and wakeup any waiters. */ xe_exec_queue_update_run_ticks(q); + if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal)) + wake_up_var(&q->xef->exec_queue.pending_removal); for (i = 0; i < q->width; ++i) xe_lrc_put(q->lrc[i]); @@ -826,7 +830,10 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data, mutex_lock(&xef->exec_queue.lock); q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id); + if (q) + atomic_inc(&xef->exec_queue.pending_removal); mutex_unlock(&xef->exec_queue.lock); + if (XE_IOCTL_DBG(xe, !q)) return -ENOENT; diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index 558fac8bb6fb..05154f9de1a6 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -598,10 +598,10 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) u64 start; u64 offset, pte; - if (XE_WARN_ON(!bo->ggtt_node)) + if (XE_WARN_ON(!bo->ggtt_node[ggtt->tile->id])) return; - start = bo->ggtt_node->base.start; + start = bo->ggtt_node[ggtt->tile->id]->base.start; for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) { pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index); @@ -612,15 +612,16 @@ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end) { - int err; u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE; + u8 tile_id = ggtt->tile->id; + int err; if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K) alignment = SZ_64K; - if (XE_WARN_ON(bo->ggtt_node)) { + if (XE_WARN_ON(bo->ggtt_node[tile_id])) { /* Someone's already inserted this BO in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); return 0; } @@ -630,19 +631,19 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile)); - bo->ggtt_node = xe_ggtt_node_init(ggtt); - if (IS_ERR(bo->ggtt_node)) { - err = PTR_ERR(bo->ggtt_node); - bo->ggtt_node = NULL; + bo->ggtt_node[tile_id] = xe_ggtt_node_init(ggtt); + if (IS_ERR(bo->ggtt_node[tile_id])) { + err = PTR_ERR(bo->ggtt_node[tile_id]); + bo->ggtt_node[tile_id] = NULL; goto out; } mutex_lock(&ggtt->lock); - err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node->base, bo->size, - alignment, 0, start, end, 0); + err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base, + bo->size, alignment, 0, start, end, 0); if (err) { - xe_ggtt_node_fini(bo->ggtt_node); - bo->ggtt_node = NULL; + xe_ggtt_node_fini(bo->ggtt_node[tile_id]); + bo->ggtt_node[tile_id] = NULL; } else { xe_ggtt_map_bo(ggtt, bo); } @@ -691,13 +692,15 @@ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) */ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { - if (XE_WARN_ON(!bo->ggtt_node)) + u8 tile_id = ggtt->tile->id; + + if (XE_WARN_ON(!bo->ggtt_node[tile_id])) return; /* This BO is not currently in the GGTT */ - xe_tile_assert(ggtt->tile, bo->ggtt_node->base.size == bo->size); + xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size); - xe_ggtt_node_remove(bo->ggtt_node, + xe_ggtt_node_remove(bo->ggtt_node[tile_id], bo->flags & XE_BO_FLAG_GGTT_INVALIDATE); } diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 64b2ae6839db..c250ea773491 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -71,8 +71,14 @@ static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched, static inline struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched) { - return list_first_entry_or_null(&sched->base.pending_list, - struct xe_sched_job, drm.list); + struct xe_sched_job *job; + + spin_lock(&sched->base.job_list_lock); + job = list_first_entry_or_null(&sched->base.pending_list, + struct xe_sched_job, drm.list); + spin_unlock(&sched->base.job_list_lock); + + return job; } static inline int diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c index fc64b45d324b..24cc6a4f9a96 100644 --- a/drivers/gpu/drm/xe/xe_gsc_proxy.c +++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c @@ -139,17 +139,29 @@ static int proxy_send_to_gsc(struct xe_gsc *gsc, u32 size) return 0; } -static int validate_proxy_header(struct xe_gsc_proxy_header *header, +static int validate_proxy_header(struct xe_gt *gt, + struct xe_gsc_proxy_header *header, u32 source, u32 dest, u32 max_size) { u32 type = FIELD_GET(GSC_PROXY_TYPE, header->hdr); u32 length = FIELD_GET(GSC_PROXY_PAYLOAD_LENGTH, header->hdr); + int ret = 0; - if (header->destination != dest || header->source != source) - return -ENOEXEC; + if (header->destination != dest || header->source != source) { + ret = -ENOEXEC; + goto out; + } - if (length + PROXY_HDR_SIZE > max_size) - return -E2BIG; + if (length + PROXY_HDR_SIZE > max_size) { + ret = -E2BIG; + goto out; + } + + /* We only care about the status if this is a message for the driver */ + if (dest == GSC_PROXY_ADDRESSING_KMD && header->status != 0) { + ret = -EIO; + goto out; + } switch (type) { case GSC_PROXY_MSG_TYPE_PROXY_PAYLOAD: @@ -157,12 +169,20 @@ static int validate_proxy_header(struct xe_gsc_proxy_header *header, break; fallthrough; case GSC_PROXY_MSG_TYPE_PROXY_INVALID: - return -EIO; + ret = -EIO; + break; default: break; } - return 0; +out: + if (ret) + xe_gt_err(gt, + "GSC proxy error: s=0x%x[0x%x], d=0x%x[0x%x], t=%u, l=0x%x, st=0x%x\n", + header->source, source, header->destination, dest, + type, length, header->status); + + return ret; } #define proxy_header_wr(xe_, map_, offset_, field_, val_) \ @@ -228,12 +248,17 @@ static int proxy_query(struct xe_gsc *gsc) xe_map_memcpy_from(xe, to_csme_hdr, &gsc->proxy.from_gsc, reply_offset, PROXY_HDR_SIZE); - /* stop if this was the last message */ - if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END) + /* Check the status and stop if this was the last message */ + if (FIELD_GET(GSC_PROXY_TYPE, to_csme_hdr->hdr) == GSC_PROXY_MSG_TYPE_PROXY_END) { + ret = validate_proxy_header(gt, to_csme_hdr, + GSC_PROXY_ADDRESSING_GSC, + GSC_PROXY_ADDRESSING_KMD, + GSC_PROXY_BUFFER_SIZE - reply_offset); break; + } /* make sure the GSC-to-CSME proxy header is sane */ - ret = validate_proxy_header(to_csme_hdr, + ret = validate_proxy_header(gt, to_csme_hdr, GSC_PROXY_ADDRESSING_GSC, GSC_PROXY_ADDRESSING_CSME, GSC_PROXY_BUFFER_SIZE - reply_offset); @@ -262,7 +287,7 @@ static int proxy_query(struct xe_gsc *gsc) } /* make sure the CSME-to-GSC proxy header is sane */ - ret = validate_proxy_header(gsc->proxy.from_csme, + ret = validate_proxy_header(gt, gsc->proxy.from_csme, GSC_PROXY_ADDRESSING_CSME, GSC_PROXY_ADDRESSING_GSC, GSC_PROXY_BUFFER_SIZE - reply_offset); diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index d6744be01a68..41ab7fbebc19 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -748,10 +748,8 @@ static int do_gt_restart(struct xe_gt *gt) if (err) return err; - for_each_hw_engine(hwe, gt, id) { + for_each_hw_engine(hwe, gt, id) xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); - xe_reg_sr_apply_whitelist(hwe); - } /* Get CCS mode in sync between sw/hw */ xe_gt_apply_ccs_mode(gt); diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 79c426dc2505..2606cd396df5 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -10,7 +10,6 @@ #include <drm/drm_exec.h> #include <drm/drm_managed.h> -#include <drm/ttm/ttm_execbuf_util.h> #include "abi/guc_actions_abi.h" #include "xe_bo.h" diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h index 5dc71394372d..11da0228cea7 100644 --- a/drivers/gpu/drm/xe/xe_gt_printk.h +++ b/drivers/gpu/drm/xe/xe_gt_printk.h @@ -60,6 +60,21 @@ static inline void __xe_gt_printfn_info(struct drm_printer *p, struct va_format xe_gt_info(gt, "%pV", vaf); } +static inline void __xe_gt_printfn_dbg(struct drm_printer *p, struct va_format *vaf) +{ + struct xe_gt *gt = p->arg; + struct drm_printer dbg; + + /* + * The original xe_gt_dbg() callsite annotations are useless here, + * redirect to the tweaked drm_dbg_printer() instead. + */ + dbg = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, NULL); + dbg.origin = p->origin; + + drm_printf(&dbg, "GT%u: %pV", gt->info.id, vaf); +} + /** * xe_gt_err_printer - Construct a &drm_printer that outputs to xe_gt_err() * @gt: the &xe_gt pointer to use in xe_gt_err() @@ -90,4 +105,20 @@ static inline struct drm_printer xe_gt_info_printer(struct xe_gt *gt) return p; } +/** + * xe_gt_dbg_printer - Construct a &drm_printer that outputs like xe_gt_dbg() + * @gt: the &xe_gt pointer to use in xe_gt_dbg() + * + * Return: The &drm_printer object. + */ +static inline struct drm_printer xe_gt_dbg_printer(struct xe_gt *gt) +{ + struct drm_printer p = { + .printfn = __xe_gt_printfn_dbg, + .arg = gt, + .origin = (const void *)_THIS_IP_, + }; + return p; +} + #endif diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 192643d63d22..65082f12f1a8 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -207,6 +207,11 @@ static int pf_push_vf_cfg_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_PREEMPT_TIMEOUT_KEY, *preempt_timeout); } +static int pf_push_vf_cfg_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority) +{ + return pf_push_vf_cfg_u32(gt, vfid, GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY, priority); +} + static int pf_push_vf_cfg_lmem(struct xe_gt *gt, unsigned int vfid, u64 size) { return pf_push_vf_cfg_u64(gt, vfid, GUC_KLV_VF_CFG_LMEM_SIZE_KEY, size); @@ -1540,8 +1545,6 @@ static u64 pf_query_max_lmem(struct xe_gt *gt) #ifdef CONFIG_DRM_XE_DEBUG_SRIOV #define MAX_FAIR_LMEM SZ_128M /* XXX: make it small for the driver bringup */ -#else -#define MAX_FAIR_LMEM SZ_2G /* XXX: known issue with allocating BO over 2GiB */ #endif static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs) @@ -1767,6 +1770,77 @@ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfi return preempt_timeout; } +static const char *sched_priority_unit(u32 priority) +{ + return priority == GUC_SCHED_PRIORITY_LOW ? "(low)" : + priority == GUC_SCHED_PRIORITY_NORMAL ? "(normal)" : + priority == GUC_SCHED_PRIORITY_HIGH ? "(high)" : + "(?)"; +} + +static int pf_provision_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + int err; + + err = pf_push_vf_cfg_sched_priority(gt, vfid, priority); + if (unlikely(err)) + return err; + + config->sched_priority = priority; + return 0; +} + +static int pf_get_sched_priority(struct xe_gt *gt, unsigned int vfid) +{ + struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid); + + return config->sched_priority; +} + +/** + * xe_gt_sriov_pf_config_set_sched_priority() - Configure scheduling priority. + * @gt: the &xe_gt + * @vfid: the VF identifier + * @priority: requested scheduling priority + * + * This function can only be called on PF. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority) +{ + int err; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + err = pf_provision_sched_priority(gt, vfid, priority); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return pf_config_set_u32_done(gt, vfid, priority, + xe_gt_sriov_pf_config_get_sched_priority(gt, vfid), + "scheduling priority", sched_priority_unit, err); +} + +/** + * xe_gt_sriov_pf_config_get_sched_priority - Get VF's scheduling priority. + * @gt: the &xe_gt + * @vfid: the VF identifier + * + * This function can only be called on PF. + * + * Return: VF's (or PF's) scheduling priority. + */ +u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid) +{ + u32 priority; + + mutex_lock(xe_gt_sriov_pf_master_mutex(gt)); + priority = pf_get_sched_priority(gt, vfid); + mutex_unlock(xe_gt_sriov_pf_master_mutex(gt)); + + return priority; +} + static void pf_reset_config_sched(struct xe_gt *gt, struct xe_gt_sriov_config *config) { lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h index 0c55aa40a1a7..f894e9d4abba 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h @@ -44,6 +44,9 @@ u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfi int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid, u32 preempt_timeout); +u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid); +int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority); + u32 xe_gt_sriov_pf_config_get_threshold(struct xe_gt *gt, unsigned int vfid, enum xe_guc_klv_threshold_index index); int xe_gt_sriov_pf_config_set_threshold(struct xe_gt *gt, unsigned int vfid, diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h index 2d3b73d78f14..686c7b3b6d7a 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config_types.h @@ -33,6 +33,8 @@ struct xe_gt_sriov_config { u32 exec_quantum; /** @preempt_timeout: preemption timeout in microseconds. */ u32 preempt_timeout; + /** @sched_priority: scheduling priority. */ + u32 sched_priority; /** @thresholds: GuC thresholds for adverse events notifications. */ u32 thresholds[XE_GUC_KLV_NUM_THRESHOLDS]; }; diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c index 05df4ab3514b..b2521dd6ec42 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c @@ -164,6 +164,7 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent) * │ │ ├── contexts_spare * │ │ ├── exec_quantum_ms * │ │ ├── preempt_timeout_us + * │ │ ├── sched_priority * │ ├── vf1 * │ │ ├── ggtt_quota * │ │ ├── lmem_quota @@ -171,6 +172,7 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent) * │ │ ├── contexts_quota * │ │ ├── exec_quantum_ms * │ │ ├── preempt_timeout_us + * │ │ ├── sched_priority */ #define DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(CONFIG, TYPE, FORMAT) \ @@ -209,6 +211,7 @@ DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ctxs, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n"); DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n"); +DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(sched_priority, u32, "%llu\n"); /* * /sys/kernel/debug/dri/0/ @@ -295,6 +298,8 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne &exec_quantum_fops); debugfs_create_file_unsafe("preempt_timeout_us", 0644, parent, parent, &preempt_timeout_fops); + debugfs_create_file_unsafe("sched_priority", 0644, parent, parent, + &sched_priority_fops); /* register all threshold attributes */ #define register_threshold_attribute(TAG, NAME, ...) \ diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h index 0bf12d89ceb2..6af219d93c3b 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_helpers.h @@ -18,7 +18,7 @@ * is within a range of supported VF numbers (up to maximum number of VFs that * driver can support, including VF0 that represents the PF itself). * - * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information. + * Note: Effective only on debug builds. See `Xe Asserts`_ for more information. */ #define xe_gt_sriov_pf_assert_vfid(gt, vfid) xe_sriov_pf_assert_vfid(gt_to_xe(gt), (vfid)) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c index fae5be5a2a11..c00fb354705f 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_policy.c @@ -135,14 +135,33 @@ static int pf_update_policy_u32(struct xe_gt *gt, u16 key, u32 *policy, u32 valu return 0; } +static void pf_bulk_reset_sched_priority(struct xe_gt *gt, u32 priority) +{ + unsigned int total_vfs = 1 + xe_gt_sriov_pf_get_totalvfs(gt); + unsigned int n; + + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); + lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); + + for (n = 0; n < total_vfs; n++) + gt->sriov.pf.vfs[n].config.sched_priority = priority; +} + static int pf_provision_sched_if_idle(struct xe_gt *gt, bool enable) { + int err; + xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt))); lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt)); - return pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY, - >->sriov.pf.policy.guc.sched_if_idle, - enable); + err = pf_update_policy_bool(gt, GUC_KLV_VGT_POLICY_SCHED_IF_IDLE_KEY, + >->sriov.pf.policy.guc.sched_if_idle, + enable); + + if (!err) + pf_bulk_reset_sched_priority(gt, enable ? GUC_SCHED_PRIORITY_NORMAL : + GUC_SCHED_PRIORITY_LOW); + return err; } static int pf_reprovision_sched_if_idle(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index d3baba50f085..cca5d5732802 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -27,6 +27,7 @@ #include "xe_guc_relay.h" #include "xe_mmio.h" #include "xe_sriov.h" +#include "xe_sriov_vf.h" #include "xe_uc_fw.h" #include "xe_wopcm.h" @@ -223,6 +224,44 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt) return 0; } +static int guc_action_vf_notify_resfix_done(struct xe_guc *guc) +{ + u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = { + FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) | + FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | + FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION, GUC_ACTION_VF2GUC_NOTIFY_RESFIX_DONE), + }; + int ret; + + ret = xe_guc_mmio_send(guc, request, ARRAY_SIZE(request)); + + return ret > 0 ? -EPROTO : ret; +} + +/** + * xe_gt_sriov_vf_notify_resfix_done - Notify GuC about resource fixups apply completed. + * @gt: the &xe_gt struct instance linked to target GuC + * + * Returns: 0 if the operation completed successfully, or a negative error + * code otherwise. + */ +int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt) +{ + struct xe_guc *guc = >->uc.guc; + int err; + + xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); + + err = guc_action_vf_notify_resfix_done(guc); + if (unlikely(err)) + xe_gt_sriov_err(gt, "Failed to notify GuC about resource fixup done (%pe)\n", + ERR_PTR(err)); + else + xe_gt_sriov_dbg_verbose(gt, "sent GuC resource fixup done\n"); + + return err; +} + static int guc_action_query_single_klv(struct xe_guc *guc, u32 key, u32 *value, u32 value_len) { @@ -692,6 +731,30 @@ failed: return err; } +/** + * xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery, + * or just mark that a GuC is ready for it. + * @gt: the &xe_gt struct instance linked to target GuC + * + * This function shall be called only by VF. + */ +void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt) +{ + struct xe_device *xe = gt_to_xe(gt); + + xe_gt_assert(gt, IS_SRIOV_VF(xe)); + + set_bit(gt->info.id, &xe->sriov.vf.migration.gt_flags); + /* + * We need to be certain that if all flags were set, at least one + * thread will notice that and schedule the recovery. + */ + smp_mb__after_atomic(); + + xe_gt_sriov_info(gt, "ready for recovery after migration\n"); + xe_sriov_vf_start_migration_recovery(xe); +} + static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor) { xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h index e541ce57bec2..912d20814261 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h @@ -17,6 +17,8 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt); int xe_gt_sriov_vf_connect(struct xe_gt *gt); int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt); int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt); +int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt); +void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt); u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt); u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt); diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h index 91d944f6c4e4..38325ef53617 100644 --- a/drivers/gpu/drm/xe/xe_gt_stats.h +++ b/drivers/gpu/drm/xe/xe_gt_stats.h @@ -6,15 +6,11 @@ #ifndef _XE_GT_STATS_H_ #define _XE_GT_STATS_H_ +#include "xe_gt_stats_types.h" + struct xe_gt; struct drm_printer; -enum xe_gt_stats_id { - XE_GT_STATS_ID_TLB_INVAL, - /* must be the last entry */ - __XE_GT_STATS_NUM_IDS, -}; - #ifdef CONFIG_DEBUG_FS int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p); void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr); diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h new file mode 100644 index 000000000000..2fc055e39f27 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef _XE_GT_STATS_TYPES_H_ +#define _XE_GT_STATS_TYPES_H_ + +enum xe_gt_stats_id { + XE_GT_STATS_ID_TLB_INVAL, + /* must be the last entry */ + __XE_GT_STATS_NUM_IDS, +}; + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c index 03b225364101..8db78d616b6f 100644 --- a/drivers/gpu/drm/xe/xe_gt_throttle.c +++ b/drivers/gpu/drm/xe/xe_gt_throttle.c @@ -8,6 +8,7 @@ #include <regs/xe_gt_regs.h> #include "xe_device.h" #include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_gt_sysfs.h" #include "xe_gt_throttle.h" #include "xe_mmio.h" @@ -53,6 +54,7 @@ static u32 read_status(struct xe_gt *gt) { u32 status = xe_gt_throttle_get_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK; + xe_gt_dbg(gt, "throttle reasons: 0x%08x\n", status); return status; } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index 3cb228c773cd..665927b80e9e 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -65,6 +65,14 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe __invalidation_fence_signal(xe, fence); } +void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence) +{ + if (WARN_ON_ONCE(!fence->gt)) + return; + + __invalidation_fence_signal(gt_to_xe(fence->gt), fence); +} + static void xe_gt_tlb_fence_timeout(struct work_struct *work) { struct xe_gt *gt = container_of(work, struct xe_gt, @@ -253,9 +261,18 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, 0, /* seqno, replaced in send_tlb_invalidation */ MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), }; + int ret; + + ret = send_tlb_invalidation(>->uc.guc, fence, action, + ARRAY_SIZE(action)); + /* + * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches + * should be nuked on a GT reset so this error can be ignored. + */ + if (ret == -ECANCELED) + return 0; - return send_tlb_invalidation(>->uc.guc, fence, action, - ARRAY_SIZE(action)); + return ret; } /** diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index f430d5797af7..00b1c6c01e8d 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -28,6 +28,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, bool stack); +void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence); static inline void xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index a287b98ee70b..6e66bf0e8b3f 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -11,10 +11,10 @@ #include "xe_gt_idle_types.h" #include "xe_gt_sriov_pf_types.h" #include "xe_gt_sriov_vf_types.h" -#include "xe_gt_stats.h" +#include "xe_gt_stats_types.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" -#include "xe_oa.h" +#include "xe_oa_types.h" #include "xe_reg_sr_types.h" #include "xe_sa_types.h" #include "xe_uc_types.h" diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c index 7f704346a8f4..4e2868efb620 100644 --- a/drivers/gpu/drm/xe/xe_guc.c +++ b/drivers/gpu/drm/xe/xe_guc.c @@ -44,7 +44,15 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc, struct xe_bo *bo) { struct xe_device *xe = guc_to_xe(guc); - u32 addr = xe_bo_ggtt_addr(bo); + u32 addr; + + /* + * For most BOs, the address on the allocating tile is fine. However for + * some, e.g. G2G CTB, the address on a specific tile is required as it + * might be different for each tile. So, just always ask for the address + * on the target GuC. + */ + addr = __xe_bo_ggtt_addr(bo, gt_to_tile(guc_to_gt(guc))->id); /* GuC addresses above GUC_GGTT_TOP don't map through the GTT */ xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc))); @@ -244,6 +252,293 @@ static void guc_write_params(struct xe_guc *guc) xe_mmio_write32(>->mmio, SOFT_SCRATCH(1 + i), guc->params[i]); } +static int guc_action_register_g2g_buffer(struct xe_guc *guc, u32 type, u32 dst_tile, u32 dst_dev, + u32 desc_addr, u32 buff_addr, u32 size) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + u32 action[] = { + XE_GUC_ACTION_REGISTER_G2G, + FIELD_PREP(XE_G2G_REGISTER_SIZE, size / SZ_4K - 1) | + FIELD_PREP(XE_G2G_REGISTER_TYPE, type) | + FIELD_PREP(XE_G2G_REGISTER_TILE, dst_tile) | + FIELD_PREP(XE_G2G_REGISTER_DEVICE, dst_dev), + desc_addr, + buff_addr, + }; + + xe_assert(xe, (type == XE_G2G_TYPE_IN) || (type == XE_G2G_TYPE_OUT)); + xe_assert(xe, !(size % SZ_4K)); + + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +static int guc_action_deregister_g2g_buffer(struct xe_guc *guc, u32 type, u32 dst_tile, u32 dst_dev) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + u32 action[] = { + XE_GUC_ACTION_DEREGISTER_G2G, + FIELD_PREP(XE_G2G_DEREGISTER_TYPE, type) | + FIELD_PREP(XE_G2G_DEREGISTER_TILE, dst_tile) | + FIELD_PREP(XE_G2G_DEREGISTER_DEVICE, dst_dev), + }; + + xe_assert(xe, (type == XE_G2G_TYPE_IN) || (type == XE_G2G_TYPE_OUT)); + + return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action)); +} + +#define G2G_DEV(gt) (((gt)->info.type == XE_GT_TYPE_MAIN) ? 0 : 1) + +#define G2G_BUFFER_SIZE (SZ_4K) +#define G2G_DESC_SIZE (64) +#define G2G_DESC_AREA_SIZE (SZ_4K) + +/* + * Generate a unique id for each bi-directional CTB for each pair of + * near and far tiles/devices. The id can then be used as an index into + * a single allocation that is sub-divided into multiple CTBs. + * + * For example, with two devices per tile and two tiles, the table should + * look like: + * Far <tile>.<dev> + * 0.0 0.1 1.0 1.1 + * N 0.0 --/-- 00/01 02/03 04/05 + * e 0.1 01/00 --/-- 06/07 08/09 + * a 1.0 03/02 07/06 --/-- 10/11 + * r 1.1 05/04 09/08 11/10 --/-- + * + * Where each entry is Rx/Tx channel id. + * + * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would + * be reading from channel #11 and writing to channel #10. Whereas, + * GuC #2 talking to GuC #3 would be read on #10 and write to #11. + */ +static unsigned int g2g_slot(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev, + u32 type, u32 max_inst, bool have_dev) +{ + u32 near = near_tile, far = far_tile; + u32 idx = 0, x, y, direction; + int i; + + if (have_dev) { + near = (near << 1) | near_dev; + far = (far << 1) | far_dev; + } + + /* No need to send to one's self */ + if (far == near) + return -1; + + if (far > near) { + /* Top right table half */ + x = far; + y = near; + + /* T/R is 'forwards' direction */ + direction = type; + } else { + /* Bottom left table half */ + x = near; + y = far; + + /* B/L is 'backwards' direction */ + direction = (1 - type); + } + + /* Count the rows prior to the target */ + for (i = y; i > 0; i--) + idx += max_inst - i; + + /* Count this row up to the target */ + idx += (x - 1 - y); + + /* Slots are in Rx/Tx pairs */ + idx *= 2; + + /* Pick Rx/Tx direction */ + idx += direction; + + return idx; +} + +static int guc_g2g_register(struct xe_guc *near_guc, struct xe_gt *far_gt, u32 type, bool have_dev) +{ + struct xe_gt *near_gt = guc_to_gt(near_guc); + struct xe_device *xe = gt_to_xe(near_gt); + struct xe_bo *g2g_bo; + u32 near_tile = gt_to_tile(near_gt)->id; + u32 near_dev = G2G_DEV(near_gt); + u32 far_tile = gt_to_tile(far_gt)->id; + u32 far_dev = G2G_DEV(far_gt); + u32 max = xe->info.gt_count; + u32 base, desc, buf; + int slot; + + /* G2G is not allowed between different cards */ + xe_assert(xe, xe == gt_to_xe(far_gt)); + + g2g_bo = near_guc->g2g.bo; + xe_assert(xe, g2g_bo); + + slot = g2g_slot(near_tile, near_dev, far_tile, far_dev, type, max, have_dev); + xe_assert(xe, slot >= 0); + + base = guc_bo_ggtt_addr(near_guc, g2g_bo); + desc = base + slot * G2G_DESC_SIZE; + buf = base + G2G_DESC_AREA_SIZE + slot * G2G_BUFFER_SIZE; + + xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE); + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= g2g_bo->size); + + return guc_action_register_g2g_buffer(near_guc, type, far_tile, far_dev, + desc, buf, G2G_BUFFER_SIZE); +} + +static void guc_g2g_deregister(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type) +{ + guc_action_deregister_g2g_buffer(guc, type, far_tile, far_dev); +} + +static u32 guc_g2g_size(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + unsigned int count = xe->info.gt_count; + u32 num_channels = (count * (count - 1)) / 2; + + xe_assert(xe, num_channels * XE_G2G_TYPE_LIMIT * G2G_DESC_SIZE <= G2G_DESC_AREA_SIZE); + + return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE; +} + +static bool xe_guc_g2g_wanted(struct xe_device *xe) +{ + /* Can't do GuC to GuC communication if there is only one GuC */ + if (xe->info.gt_count <= 1) + return false; + + /* No current user */ + return false; +} + +static int guc_g2g_alloc(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + struct xe_tile *tile = gt_to_tile(gt); + struct xe_bo *bo; + u32 g2g_size; + + if (guc->g2g.bo) + return 0; + + if (gt->info.id != 0) { + struct xe_gt *root_gt = xe_device_get_gt(xe, 0); + struct xe_guc *root_guc = &root_gt->uc.guc; + struct xe_bo *bo; + + bo = xe_bo_get(root_guc->g2g.bo); + if (!bo) + return -ENODEV; + + guc->g2g.bo = bo; + guc->g2g.owned = false; + return 0; + } + + g2g_size = guc_g2g_size(guc); + bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size, + XE_BO_FLAG_VRAM_IF_DGFX(tile) | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_ALL | + XE_BO_FLAG_GGTT_INVALIDATE); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size); + guc->g2g.bo = bo; + guc->g2g.owned = true; + + return 0; +} + +static void guc_g2g_fini(struct xe_guc *guc) +{ + if (!guc->g2g.bo) + return; + + /* Unpinning the owned object is handled by generic shutdown */ + if (!guc->g2g.owned) + xe_bo_put(guc->g2g.bo); + + guc->g2g.bo = NULL; +} + +static int guc_g2g_start(struct xe_guc *guc) +{ + struct xe_gt *far_gt, *gt = guc_to_gt(guc); + struct xe_device *xe = gt_to_xe(gt); + unsigned int i, j; + int t, err; + bool have_dev; + + if (!guc->g2g.bo) { + int ret; + + ret = guc_g2g_alloc(guc); + if (ret) + return ret; + } + + /* GuC interface will need extending if more GT device types are ever created. */ + xe_gt_assert(gt, (gt->info.type == XE_GT_TYPE_MAIN) || (gt->info.type == XE_GT_TYPE_MEDIA)); + + /* Channel numbering depends on whether there are multiple GTs per tile */ + have_dev = xe->info.gt_count > xe->info.tile_count; + + for_each_gt(far_gt, xe, i) { + u32 far_tile, far_dev; + + if (far_gt->info.id == gt->info.id) + continue; + + far_tile = gt_to_tile(far_gt)->id; + far_dev = G2G_DEV(far_gt); + + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) { + err = guc_g2g_register(guc, far_gt, t, have_dev); + if (err) { + while (--t >= 0) + guc_g2g_deregister(guc, far_tile, far_dev, t); + goto err_deregister; + } + } + } + + return 0; + +err_deregister: + for_each_gt(far_gt, xe, j) { + u32 tile, dev; + + if (far_gt->info.id == gt->info.id) + continue; + + if (j >= i) + break; + + tile = gt_to_tile(far_gt)->id; + dev = G2G_DEV(far_gt); + + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) + guc_g2g_deregister(guc, tile, dev, t); + } + + return err; +} + static void guc_fini_hw(void *arg) { struct xe_guc *guc = arg; @@ -253,6 +548,8 @@ static void guc_fini_hw(void *arg) fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); xe_uc_fini_hw(&guc_to_gt(guc)->uc); xe_force_wake_put(gt_to_fw(gt), fw_ref); + + guc_g2g_fini(guc); } /** @@ -423,7 +720,16 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc) int xe_guc_post_load_init(struct xe_guc *guc) { + int ret; + xe_guc_ads_populate_post_load(&guc->ads); + + if (xe_guc_g2g_wanted(guc_to_xe(guc))) { + ret = guc_g2g_start(guc); + if (ret) + return ret; + } + guc->submission_state.enabled = true; return 0; @@ -945,7 +1251,6 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request, BUILD_BUG_ON(VF_SW_FLAG_COUNT != MED_VF_SW_FLAG_COUNT); - xe_assert(xe, !xe_guc_ct_enabled(&guc->ct)); xe_assert(xe, len); xe_assert(xe, len <= VF_SW_FLAG_COUNT); xe_assert(xe, len <= MED_VF_SW_FLAG_COUNT); @@ -1099,10 +1404,21 @@ int xe_guc_self_cfg64(struct xe_guc *guc, u16 key, u64 val) return guc_self_cfg(guc, key, 2, val); } +static void xe_guc_sw_0_irq_handler(struct xe_guc *guc) +{ + struct xe_gt *gt = guc_to_gt(guc); + + if (IS_SRIOV_VF(gt_to_xe(gt))) + xe_gt_sriov_vf_migrated_event_handler(gt); +} + void xe_guc_irq_handler(struct xe_guc *guc, const u16 iir) { if (iir & GUC_INTR_GUC2HOST) xe_guc_ct_irq_handler(&guc->ct); + + if (iir & GUC_INTR_SW_INT_0) + xe_guc_sw_0_irq_handler(guc); } void xe_guc_sanitize(struct xe_guc *guc) diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c index 4e746ae98888..943146e5b460 100644 --- a/drivers/gpu/drm/xe/xe_guc_ads.c +++ b/drivers/gpu/drm/xe/xe_guc_ads.c @@ -231,11 +231,6 @@ static size_t guc_ads_size(struct xe_guc_ads *ads) guc_ads_private_data_size(ads); } -static bool needs_wa_1607983814(struct xe_device *xe) -{ - return GRAPHICS_VERx100(xe) < 1250; -} - static size_t calculate_regset_size(struct xe_gt *gt) { struct xe_reg_sr_entry *sr_entry; @@ -250,7 +245,7 @@ static size_t calculate_regset_size(struct xe_gt *gt) count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES; - if (needs_wa_1607983814(gt_to_xe(gt))) + if (XE_WA(gt, 1607983814)) count += LNCFCMOCS_REG_COUNT; return count * sizeof(struct guc_mmio_reg); @@ -709,7 +704,6 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, struct iosys_map *regset_map, struct xe_hw_engine *hwe) { - struct xe_device *xe = ads_to_xe(ads); struct xe_hw_engine *hwe_rcs_reset_domain = xe_gt_any_hw_engine_by_reset_domain(hwe->gt, XE_ENGINE_CLASS_RENDER); struct xe_reg_sr_entry *entry; @@ -740,8 +734,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads, guc_mmio_regset_write_one(ads, regset_map, e->reg, count++); } - /* Wa_1607983814 */ - if (needs_wa_1607983814(xe) && hwe->class == XE_ENGINE_CLASS_RENDER) { + if (XE_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) { for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) { guc_mmio_regset_write_one(ads, regset_map, XELP_LNCFCMOCS(i), count++); diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c index d63912d28246..137571fae4ed 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.c +++ b/drivers/gpu/drm/xe/xe_guc_capture.c @@ -1806,7 +1806,6 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm if (!devcore_snapshot->matched_node) return; - xe_gt_assert(gt, snapshot->source <= XE_ENGINE_CAPTURE_SOURCE_GUC); xe_gt_assert(gt, snapshot->hwe); capture_class = xe_engine_class_to_guc_capture_class(snapshot->hwe->class); @@ -1815,7 +1814,8 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm snapshot->name ? snapshot->name : "", snapshot->logical_instance); drm_printf(p, "\tCapture_source: %s\n", - snapshot->source == XE_ENGINE_CAPTURE_SOURCE_GUC ? "GuC" : "Manual"); + devcore_snapshot->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC ? + "GuC" : "Manual"); drm_printf(p, "\tCoverage: %s\n", grptype[devcore_snapshot->matched_node->is_partial]); drm_printf(p, "\tForcewake: domain 0x%x, ref %d\n", snapshot->forcewake.domain, snapshot->forcewake.ref); @@ -1840,29 +1840,24 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm } /** - * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the job. - * @job: The job object. + * xe_guc_capture_get_matching_and_lock - Matching GuC capture for the queue. + * @q: The exec queue object * - * Search within the capture outlist for the job, could be used for check if - * GuC capture is ready for the job. + * Search within the capture outlist for the queue, could be used for check if + * GuC capture is ready for the queue. * If found, the locked boolean of the node will be flagged. * * Returns: found guc-capture node ptr else NULL */ struct __guc_capture_parsed_output * -xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job) +xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q) { struct xe_hw_engine *hwe; enum xe_hw_engine_id id; - struct xe_exec_queue *q; struct xe_device *xe; u16 guc_class = GUC_LAST_ENGINE_CLASS + 1; struct xe_devcoredump_snapshot *ss; - if (!job) - return NULL; - - q = job->q; if (!q || !q->gt) return NULL; @@ -1874,7 +1869,7 @@ xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job) if (ss->matched_node && ss->matched_node->source == XE_ENGINE_CAPTURE_SOURCE_GUC) return ss->matched_node; - /* Find hwe for the job */ + /* Find hwe for the queue */ for_each_hw_engine(hwe, q->gt, id) { if (hwe != q->hwe) continue; @@ -1906,17 +1901,16 @@ xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job) } /** - * xe_engine_snapshot_capture_for_job - Take snapshot of associated engine - * @job: The job object + * xe_engine_snapshot_capture_for_queue - Take snapshot of associated engine + * @q: The exec queue object * * Take snapshot of associated HW Engine * * Returns: None. */ void -xe_engine_snapshot_capture_for_job(struct xe_sched_job *job) +xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q) { - struct xe_exec_queue *q = job->q; struct xe_device *xe = gt_to_xe(q->gt); struct xe_devcoredump *coredump = &xe->devcoredump; struct xe_hw_engine *hwe; @@ -1934,11 +1928,12 @@ xe_engine_snapshot_capture_for_job(struct xe_sched_job *job) } if (!coredump->snapshot.hwe[id]) { - coredump->snapshot.hwe[id] = xe_hw_engine_snapshot_capture(hwe, job); + coredump->snapshot.hwe[id] = + xe_hw_engine_snapshot_capture(hwe, q); } else { struct __guc_capture_parsed_output *new; - new = xe_guc_capture_get_matching_and_lock(job); + new = xe_guc_capture_get_matching_and_lock(q); if (new) { struct xe_guc *guc = &q->gt->uc.guc; diff --git a/drivers/gpu/drm/xe/xe_guc_capture.h b/drivers/gpu/drm/xe/xe_guc_capture.h index 97a795d13dd1..20a078dc4b85 100644 --- a/drivers/gpu/drm/xe/xe_guc_capture.h +++ b/drivers/gpu/drm/xe/xe_guc_capture.h @@ -11,10 +11,10 @@ #include "xe_guc.h" #include "xe_guc_fwif.h" +struct xe_exec_queue; struct xe_guc; struct xe_hw_engine; struct xe_hw_engine_snapshot; -struct xe_sched_job; static inline enum guc_capture_list_class_type xe_guc_class_to_capture_class(u16 class) { @@ -50,10 +50,10 @@ size_t xe_guc_capture_ads_input_worst_size(struct xe_guc *guc); const struct __guc_mmio_reg_descr_group * xe_guc_capture_get_reg_desc_list(struct xe_gt *gt, u32 owner, u32 type, enum guc_capture_list_class_type capture_class, bool is_ext); -struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_sched_job *job); +struct __guc_capture_parsed_output *xe_guc_capture_get_matching_and_lock(struct xe_exec_queue *q); void xe_engine_manual_capture(struct xe_hw_engine *hwe, struct xe_hw_engine_snapshot *snapshot); void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm_printer *p); -void xe_engine_snapshot_capture_for_job(struct xe_sched_job *job); +void xe_engine_snapshot_capture_for_queue(struct xe_exec_queue *q); void xe_guc_capture_steered_list_init(struct xe_guc *guc); void xe_guc_capture_put_matched_nodes(struct xe_guc *guc); int xe_guc_capture_init(struct xe_guc *guc); diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 8aeb1789805c..7d33f3a11e61 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -54,6 +54,7 @@ enum { CT_DEAD_PARSE_G2H_UNKNOWN, /* 0x1000 */ CT_DEAD_PARSE_G2H_ORIGIN, /* 0x2000 */ CT_DEAD_PARSE_G2H_TYPE, /* 0x4000 */ + CT_DEAD_CRASH, /* 0x8000 */ }; static void ct_dead_worker_func(struct work_struct *w); @@ -469,8 +470,10 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct) * after any existing dead state has been dumped. */ spin_lock_irq(&ct->dead.lock); - if (ct->dead.reason) + if (ct->dead.reason) { ct->dead.reason |= (1 << CT_DEAD_STATE_REARM); + queue_work(system_unbound_wq, &ct->dead.worker); + } spin_unlock_irq(&ct->dead.lock); #endif @@ -1017,7 +1020,6 @@ retry_same_fence: } ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ); - if (!ret) { LNL_FLUSH_WORK(&ct->g2h_worker); if (g2h_fence.done) { @@ -1121,6 +1123,24 @@ static int parse_g2h_event(struct xe_guc_ct *ct, u32 *msg, u32 len) return 0; } +static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action) +{ + struct xe_gt *gt = ct_to_gt(ct); + + if (action == XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED) + xe_gt_err(gt, "GuC Crash dump notification\n"); + else if (action == XE_GUC_ACTION_NOTIFY_EXCEPTION) + xe_gt_err(gt, "GuC Exception notification\n"); + else + xe_gt_err(gt, "Unknown GuC crash notification: 0x%04X\n", action); + + CT_DEAD(ct, NULL, CRASH); + + kick_reset(ct); + + return 0; +} + static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len) { struct xe_gt *gt = ct_to_gt(ct); @@ -1295,13 +1315,17 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len) case GUC_ACTION_GUC2PF_ADVERSE_EVENT: ret = xe_gt_sriov_pf_monitor_process_guc2pf(gt, hxg, hxg_len); break; + case XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED: + case XE_GUC_ACTION_NOTIFY_EXCEPTION: + ret = guc_crash_process_msg(ct, action); + break; default: xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action); } if (ret) { - xe_gt_err(gt, "G2H action 0x%04x failed (%pe)\n", - action, ERR_PTR(ret)); + xe_gt_err(gt, "G2H action %#04x failed (%pe) len %u msg %*ph\n", + action, ERR_PTR(ret), hxg_len, (int)sizeof(u32) * hxg_len, hxg); CT_DEAD(ct, NULL, PROCESS_FAILED); } diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h index 08ffe59f22fa..057153f89b30 100644 --- a/drivers/gpu/drm/xe/xe_guc_fwif.h +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h @@ -17,6 +17,7 @@ #define G2H_LEN_DW_TLB_INVALIDATE 3 #define GUC_ID_MAX 65535 +#define GUC_ID_UNKNOWN 0xffffffff #define GUC_CONTEXT_DISABLE 0 #define GUC_CONTEXT_ENABLE 1 diff --git a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c index 9d99fe266d97..146a6eda9e06 100644 --- a/drivers/gpu/drm/xe/xe_guc_klv_helpers.c +++ b/drivers/gpu/drm/xe/xe_guc_klv_helpers.c @@ -49,6 +49,8 @@ const char *xe_guc_klv_key_to_string(u16 key) return "begin_db_id"; case GUC_KLV_VF_CFG_BEGIN_CONTEXT_ID_KEY: return "begin_ctx_id"; + case GUC_KLV_VF_CFG_SCHED_PRIORITY_KEY: + return "sched_priority"; /* VF CFG threshold keys */ #define define_threshold_key_to_string_case(TAG, NAME, ...) \ diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 6f4a9812b4f4..9c36329fe857 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -412,12 +412,11 @@ static const int xe_exec_queue_prio_to_guc[] = { static void init_policies(struct xe_guc *guc, struct xe_exec_queue *q) { struct exec_queue_policy policy; - struct xe_device *xe = guc_to_xe(guc); enum xe_exec_queue_priority prio = q->sched_props.priority; u32 timeslice_us = q->sched_props.timeslice_us; u32 preempt_timeout_us = q->sched_props.preempt_timeout_us; - xe_assert(xe, exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); __guc_exec_queue_policy_start_klv(&policy, q->guc->id); __guc_exec_queue_policy_add_priority(&policy, xe_exec_queue_prio_to_guc[prio]); @@ -451,12 +450,11 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc, struct guc_ctxt_registration_info *info) { #define MAX_MLRC_REG_SIZE (13 + XE_HW_ENGINE_MAX_INSTANCE * 2) - struct xe_device *xe = guc_to_xe(guc); u32 action[MAX_MLRC_REG_SIZE]; int len = 0; int i; - xe_assert(xe, xe_exec_queue_is_parallel(q)); + xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_parallel(q)); action[len++] = XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC; action[len++] = info->flags; @@ -479,7 +477,7 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc, action[len++] = upper_32_bits(xe_lrc_descriptor(lrc)); } - xe_assert(xe, len <= MAX_MLRC_REG_SIZE); + xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE); #undef MAX_MLRC_REG_SIZE xe_guc_ct_send(&guc->ct, action, len, 0, 0); @@ -513,7 +511,7 @@ static void register_exec_queue(struct xe_exec_queue *q) struct xe_lrc *lrc = q->lrc[0]; struct guc_ctxt_registration_info info; - xe_assert(xe, !exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q)); memset(&info, 0, sizeof(info)); info.context_idx = q->guc->id; @@ -603,7 +601,7 @@ static int wq_noop_append(struct xe_exec_queue *q) if (wq_wait_for_space(q, wq_space_until_wrap(q))) return -ENODEV; - xe_assert(xe, FIELD_FIT(WQ_LEN_MASK, len_dw)); + xe_gt_assert(guc_to_gt(guc), FIELD_FIT(WQ_LEN_MASK, len_dw)); parallel_write(xe, map, wq[q->guc->wqi_tail / sizeof(u32)], FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) | @@ -643,13 +641,13 @@ static void wq_item_append(struct xe_exec_queue *q) wqi[i++] = lrc->ring.tail / sizeof(u64); } - xe_assert(xe, i == wqi_size / sizeof(u32)); + xe_gt_assert(guc_to_gt(guc), i == wqi_size / sizeof(u32)); iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, wq[q->guc->wqi_tail / sizeof(u32)])); xe_map_memcpy_to(xe, &map, 0, wqi, wqi_size); q->guc->wqi_tail += wqi_size; - xe_assert(xe, q->guc->wqi_tail <= WQ_SIZE); + xe_gt_assert(guc_to_gt(guc), q->guc->wqi_tail <= WQ_SIZE); xe_device_wmb(xe); @@ -661,7 +659,6 @@ static void wq_item_append(struct xe_exec_queue *q) static void submit_exec_queue(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); struct xe_lrc *lrc = q->lrc[0]; u32 action[3]; u32 g2h_len = 0; @@ -669,7 +666,7 @@ static void submit_exec_queue(struct xe_exec_queue *q) int len = 0; bool extra_submit = false; - xe_assert(xe, exec_queue_registered(q)); + xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q)); if (xe_exec_queue_is_parallel(q)) wq_item_append(q); @@ -716,12 +713,11 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job) struct xe_sched_job *job = to_xe_sched_job(drm_job); struct xe_exec_queue *q = job->q; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); struct dma_fence *fence = NULL; bool lr = xe_exec_queue_is_lr(q); - xe_assert(xe, !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || - exec_queue_banned(q) || exec_queue_suspended(q)); + xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) || + exec_queue_banned(q) || exec_queue_suspended(q)); trace_xe_sched_job_run(job); @@ -823,7 +819,7 @@ static void xe_guc_exec_queue_trigger_cleanup(struct xe_exec_queue *q) */ void xe_guc_submit_wedge(struct xe_guc *guc) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; unsigned long index; int err; @@ -833,7 +829,8 @@ void xe_guc_submit_wedge(struct xe_guc *guc) err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { - drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); + xe_gt_err(gt, "Failed to register clean-up on wedged.mode=2; " + "Although device is wedged.\n"); return; } @@ -865,11 +862,10 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) container_of(w, struct xe_guc_exec_queue, lr_tdr); struct xe_exec_queue *q = ge->q; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); struct xe_gpu_scheduler *sched = &ge->sched; bool wedged; - xe_assert(xe, xe_exec_queue_is_lr(q)); + xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q)); trace_xe_exec_queue_lr_cleanup(q); wedged = guc_submit_hint_wedged(exec_queue_to_guc(q)); @@ -903,13 +899,19 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w) !exec_queue_pending_disable(q) || xe_guc_read_stopped(guc), HZ * 5); if (!ret) { - drm_warn(&xe->drm, "Schedule disable failed to respond"); + xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n", + q->guc->id); + xe_devcoredump(q, NULL, "Schedule disable failed to respond, guc_id=%d\n", + q->guc->id); xe_sched_submission_start(sched); xe_gt_reset_async(q->gt); return; } } + if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0])) + xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id); + xe_sched_submission_start(sched); } @@ -1068,13 +1070,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) * do manual capture first and decide later if we need to use it */ if (!exec_queue_killed(q) && !xe->devcoredump.captured && - !xe_guc_capture_get_matching_and_lock(job)) { + !xe_guc_capture_get_matching_and_lock(q)) { /* take force wake before engine register manual capture */ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) xe_gt_info(q->gt, "failed to get forcewake for coredump capture\n"); - xe_engine_snapshot_capture_for_job(job); + xe_engine_snapshot_capture_for_queue(q); xe_force_wake_put(gt_to_fw(q->gt), fw_ref); } @@ -1132,7 +1134,12 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) if (!ret || xe_guc_read_stopped(guc)) { trigger_reset: if (!ret) - xe_gt_warn(guc_to_gt(guc), "Schedule disable failed to respond"); + xe_gt_warn(guc_to_gt(guc), + "Schedule disable failed to respond, guc_id=%d", + q->guc->id); + xe_devcoredump(q, job, + "Schedule disable failed to respond, guc_id=%d, ret=%d, guc_read=%d", + q->guc->id, ret, xe_guc_read_stopped(guc)); set_exec_queue_extra_ref(q); xe_exec_queue_get(q); /* GT reset owns this */ set_exec_queue_banned(q); @@ -1162,7 +1169,10 @@ trigger_reset: trace_xe_sched_job_timedout(job); if (!exec_queue_killed(q)) - xe_devcoredump(job); + xe_devcoredump(q, job, + "Timedout job - seqno=%u, lrc_seqno=%u, guc_id=%d, flags=0x%lx", + xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job), + q->guc->id, q->flags); /* * Kernel jobs should never fail, nor should VM jobs if they do @@ -1277,9 +1287,8 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg) { struct xe_exec_queue *q = msg->private_data; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); + xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT)); trace_xe_exec_queue_cleanup_entity(q); if (exec_queue_registered(q)) @@ -1315,11 +1324,10 @@ static void __suspend_fence_signal(struct xe_exec_queue *q) static void suspend_fence_signal(struct xe_exec_queue *q) { struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, exec_queue_suspended(q) || exec_queue_killed(q) || - xe_guc_read_stopped(guc)); - xe_assert(xe, q->guc->suspend_pending); + xe_gt_assert(guc_to_gt(guc), exec_queue_suspended(q) || exec_queue_killed(q) || + xe_guc_read_stopped(guc)); + xe_gt_assert(guc_to_gt(guc), q->guc->suspend_pending); __suspend_fence_signal(q); } @@ -1415,12 +1423,11 @@ static int guc_exec_queue_init(struct xe_exec_queue *q) { struct xe_gpu_scheduler *sched; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); struct xe_guc_exec_queue *ge; long timeout; int err, i; - xe_assert(xe, xe_device_uc_enabled(guc_to_xe(guc))); + xe_gt_assert(guc_to_gt(guc), xe_device_uc_enabled(guc_to_xe(guc))); ge = kzalloc(sizeof(*ge), GFP_KERNEL); if (!ge) @@ -1633,9 +1640,8 @@ static void guc_exec_queue_resume(struct xe_exec_queue *q) struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_RESUME; struct xe_guc *guc = exec_queue_to_guc(q); - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, !q->guc->suspend_pending); + xe_gt_assert(guc_to_gt(guc), !q->guc->suspend_pending); xe_sched_msg_lock(sched); guc_exec_queue_try_add_msg(q, msg, RESUME); @@ -1708,7 +1714,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q) ban = true; } } else if (xe_exec_queue_is_lr(q) && - (xe_lrc_ring_head(q->lrc[0]) != xe_lrc_ring_tail(q->lrc[0]))) { + !xe_lrc_ring_is_idle(q->lrc[0])) { ban = true; } @@ -1747,9 +1753,8 @@ void xe_guc_submit_stop(struct xe_guc *guc) { struct xe_exec_queue *q; unsigned long index; - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, xe_guc_read_stopped(guc) == 1); + xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); mutex_lock(&guc->submission_state.lock); @@ -1791,9 +1796,8 @@ int xe_guc_submit_start(struct xe_guc *guc) { struct xe_exec_queue *q; unsigned long index; - struct xe_device *xe = guc_to_xe(guc); - xe_assert(xe, xe_guc_read_stopped(guc) == 1); + xe_gt_assert(guc_to_gt(guc), xe_guc_read_stopped(guc) == 1); mutex_lock(&guc->submission_state.lock); atomic_dec(&guc->submission_state.stopped); @@ -1814,22 +1818,22 @@ int xe_guc_submit_start(struct xe_guc *guc) static struct xe_exec_queue * g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); struct xe_exec_queue *q; if (unlikely(guc_id >= GUC_ID_MAX)) { - drm_err(&xe->drm, "Invalid guc_id %u", guc_id); + xe_gt_err(gt, "Invalid guc_id %u\n", guc_id); return NULL; } q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id); if (unlikely(!q)) { - drm_err(&xe->drm, "Not engine present for guc_id %u", guc_id); + xe_gt_err(gt, "Not engine present for guc_id %u\n", guc_id); return NULL; } - xe_assert(xe, guc_id >= q->guc->id); - xe_assert(xe, guc_id < (q->guc->id + q->width)); + xe_gt_assert(guc_to_gt(guc), guc_id >= q->guc->id); + xe_gt_assert(guc_to_gt(guc), guc_id < (q->guc->id + q->width)); return q; } @@ -1898,15 +1902,14 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q, int xe_guc_sched_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { - struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; - u32 guc_id = msg[0]; - u32 runnable_state = msg[1]; + u32 guc_id, runnable_state; - if (unlikely(len < 2)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 2)) return -EPROTO; - } + + guc_id = msg[0]; + runnable_state = msg[1]; q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) @@ -1940,14 +1943,13 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q) int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) { - struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; - u32 guc_id = msg[0]; + u32 guc_id; - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 1)) return -EPROTO; - } + + guc_id = msg[0]; q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) @@ -1969,14 +1971,13 @@ int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len) int xe_guc_exec_queue_reset_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; - u32 guc_id = msg[0]; + u32 guc_id; - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 1)) return -EPROTO; - } + + guc_id = msg[0]; q = g2h_exec_queue_lookup(guc, guc_id); if (unlikely(!q)) @@ -2016,10 +2017,8 @@ int xe_guc_error_capture_handler(struct xe_guc *guc, u32 *msg, u32 len) { u32 status; - if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) { - xe_gt_dbg(guc_to_gt(guc), "Invalid length %u", len); + if (unlikely(len != XE_GUC_ACTION_STATE_CAPTURE_NOTIFICATION_DATA_LEN)) return -EPROTO; - } status = msg[0] & XE_GUC_STATE_CAPTURE_EVENT_STATUS_MASK; if (status == XE_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE) @@ -2034,13 +2033,21 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, u32 len) { struct xe_gt *gt = guc_to_gt(guc); - struct xe_device *xe = guc_to_xe(guc); struct xe_exec_queue *q; - u32 guc_id = msg[0]; + u32 guc_id; - if (unlikely(len < 1)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len < 1)) return -EPROTO; + + guc_id = msg[0]; + + if (guc_id == GUC_ID_UNKNOWN) { + /* + * GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF + * context. In such case only PF will be notified about that fault. + */ + xe_gt_err_ratelimited(gt, "Memory CAT error reported by GuC!\n"); + return 0; } q = g2h_exec_queue_lookup(guc, guc_id); @@ -2062,24 +2069,22 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len) { - struct xe_device *xe = guc_to_xe(guc); + struct xe_gt *gt = guc_to_gt(guc); u8 guc_class, instance; u32 reason; - if (unlikely(len != 3)) { - drm_err(&xe->drm, "Invalid length %u", len); + if (unlikely(len != 3)) return -EPROTO; - } guc_class = msg[0]; instance = msg[1]; reason = msg[2]; /* Unexpected failure of a hardware feature, log an actual error */ - drm_err(&xe->drm, "GuC engine reset request failed on %d:%d because 0x%08X", - guc_class, instance, reason); + xe_gt_err(gt, "GuC engine reset request failed on %d:%d because 0x%08X", + guc_class, instance, reason); - xe_gt_reset_async(guc_to_gt(guc)); + xe_gt_reset_async(gt); return 0; } diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h index fa75f57bf5da..83a41ebcdc91 100644 --- a/drivers/gpu/drm/xe/xe_guc_types.h +++ b/drivers/gpu/drm/xe/xe_guc_types.h @@ -64,6 +64,15 @@ struct xe_guc { struct xe_guc_pc pc; /** @dbm: GuC Doorbell Manager */ struct xe_guc_db_mgr dbm; + + /** @g2g: GuC to GuC communication state */ + struct { + /** @g2g.bo: Storage for GuC to GuC communication channels */ + struct xe_bo *bo; + /** @g2g.owned: Is the BO owned by this GT or just mapped in */ + bool owned; + } g2g; + /** @submission_state: GuC submission state */ struct { /** @submission_state.idm: GuC context ID Manager */ @@ -79,6 +88,7 @@ struct xe_guc { /** @submission_state.fini_wq: submit fini wait queue */ wait_queue_head_t fini_wq; } submission_state; + /** @hwconfig: Hardware config state */ struct { /** @hwconfig.bo: buffer object of the hardware config */ diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c index 65b2e147c4b9..d765bfd3636b 100644 --- a/drivers/gpu/drm/xe/xe_heci_gsc.c +++ b/drivers/gpu/drm/xe/xe_heci_gsc.c @@ -92,7 +92,7 @@ void xe_heci_gsc_fini(struct xe_device *xe) { struct xe_heci_gsc *heci_gsc = &xe->heci_gsc; - if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe)) + if (!xe->info.has_heci_gscfi && !xe->info.has_heci_cscfi) return; if (heci_gsc->adev) { @@ -177,7 +177,7 @@ void xe_heci_gsc_init(struct xe_device *xe) const struct heci_gsc_def *def; int ret; - if (!HAS_HECI_GSCFI(xe) && !HAS_HECI_CSCFI(xe)) + if (!xe->info.has_heci_gscfi && !xe->info.has_heci_cscfi) return; heci_gsc->irq = -1; @@ -222,7 +222,7 @@ void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir) if ((iir & GSC_IRQ_INTF(1)) == 0) return; - if (!HAS_HECI_GSCFI(xe)) { + if (!xe->info.has_heci_gscfi) { drm_warn_once(&xe->drm, "GSC irq: not supported"); return; } @@ -242,7 +242,7 @@ void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir) if ((iir & CSC_IRQ_INTF(1)) == 0) return; - if (!HAS_HECI_CSCFI(xe)) { + if (!xe->info.has_heci_cscfi) { drm_warn_once(&xe->drm, "CSC irq: not supported"); return; } diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c index 1557acee3523..b19366744148 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.c +++ b/drivers/gpu/drm/xe/xe_hw_engine.c @@ -574,7 +574,6 @@ static int hw_engine_init(struct xe_gt *gt, struct xe_hw_engine *hwe, xe_gt_assert(gt, gt->info.engine_mask & BIT(id)); xe_reg_sr_apply_mmio(&hwe->reg_sr, gt); - xe_reg_sr_apply_whitelist(hwe); hwe->hwsp = xe_managed_bo_create_pin_map(xe, tile, SZ_4K, XE_BO_FLAG_VRAM_IF_DGFX(tile) | @@ -829,7 +828,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) /** * xe_hw_engine_snapshot_capture - Take a quick snapshot of the HW Engine. * @hwe: Xe HW Engine. - * @job: The job object. + * @q: The exec queue object. * * This can be printed out in a later stage like during dev_coredump * analysis. @@ -838,7 +837,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec) * caller, using `xe_hw_engine_snapshot_free`. */ struct xe_hw_engine_snapshot * -xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job) +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q) { struct xe_hw_engine_snapshot *snapshot; struct __guc_capture_parsed_output *node; @@ -864,15 +863,14 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job if (IS_SRIOV_VF(gt_to_xe(hwe->gt))) return snapshot; - if (job) { + if (q) { /* If got guc capture, set source to GuC */ - node = xe_guc_capture_get_matching_and_lock(job); + node = xe_guc_capture_get_matching_and_lock(q); if (node) { struct xe_device *xe = gt_to_xe(hwe->gt); struct xe_devcoredump *coredump = &xe->devcoredump; coredump->snapshot.matched_node = node; - snapshot->source = XE_ENGINE_CAPTURE_SOURCE_GUC; xe_gt_dbg(hwe->gt, "Found and locked GuC-err-capture node"); return snapshot; } @@ -880,7 +878,6 @@ xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job /* otherwise, do manual capture */ xe_engine_manual_capture(hwe, snapshot); - snapshot->source = XE_ENGINE_CAPTURE_SOURCE_MANUAL; xe_gt_dbg(hwe->gt, "Proceeding with manual engine snapshot"); return snapshot; diff --git a/drivers/gpu/drm/xe/xe_hw_engine.h b/drivers/gpu/drm/xe/xe_hw_engine.h index da0a6922a26f..6b5f9fa2a594 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine.h +++ b/drivers/gpu/drm/xe/xe_hw_engine.h @@ -11,7 +11,7 @@ struct drm_printer; struct drm_xe_engine_class_instance; struct xe_device; -struct xe_sched_job; +struct xe_exec_queue; #ifdef CONFIG_DRM_XE_JOB_TIMEOUT_MIN #define XE_HW_ENGINE_JOB_TIMEOUT_MIN CONFIG_DRM_XE_JOB_TIMEOUT_MIN @@ -56,7 +56,7 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe); u32 xe_hw_engine_mask_per_class(struct xe_gt *gt, enum xe_engine_class engine_class); struct xe_hw_engine_snapshot * -xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_sched_job *job); +xe_hw_engine_snapshot_capture(struct xe_hw_engine *hwe, struct xe_exec_queue *q); void xe_hw_engine_snapshot_free(struct xe_hw_engine_snapshot *snapshot); void xe_hw_engine_print(struct xe_hw_engine *hwe, struct drm_printer *p); void xe_hw_engine_setup_default_lrc_state(struct xe_hw_engine *hwe); diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 719f27ef00a5..e14bee95e364 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -165,8 +165,6 @@ enum xe_hw_engine_snapshot_source_id { struct xe_hw_engine_snapshot { /** @name: name of the hw engine */ char *name; - /** @source: Data source, either manual or GuC */ - enum xe_hw_engine_snapshot_source_id source; /** @hwe: hw engine */ struct xe_hw_engine *hwe; /** @logical_instance: logical instance of this hw engine */ diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c index b7995ebd54ab..1c509e66694d 100644 --- a/drivers/gpu/drm/xe/xe_irq.c +++ b/drivers/gpu/drm/xe/xe_irq.c @@ -192,7 +192,7 @@ void xe_irq_enable_hwe(struct xe_gt *gt) if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) { gsc_mask = irqs | GSC_ER_COMPLETE; heci_mask = GSC_IRQ_INTF(1); - } else if (HAS_HECI_GSCFI(xe)) { + } else if (xe->info.has_heci_gscfi) { gsc_mask = GSC_IRQ_INTF(1); } @@ -325,7 +325,7 @@ static void gt_irq_handler(struct xe_tile *tile, if (class == XE_ENGINE_CLASS_OTHER) { /* HECI GSCFI interrupts come from outside of GT */ - if (HAS_HECI_GSCFI(xe) && instance == OTHER_GSC_INSTANCE) + if (xe->info.has_heci_gscfi && instance == OTHER_GSC_INSTANCE) xe_heci_gsc_irq_handler(xe, intr_vec); else gt_other_irq_handler(engine_gt, instance, intr_vec); @@ -348,12 +348,8 @@ static irqreturn_t xelp_irq_handler(int irq, void *arg) unsigned long intr_dw[2]; u32 identity[32]; - spin_lock(&xe->irq.lock); - if (!xe->irq.enabled) { - spin_unlock(&xe->irq.lock); + if (!atomic_read(&xe->irq.enabled)) return IRQ_NONE; - } - spin_unlock(&xe->irq.lock); master_ctl = xelp_intr_disable(xe); if (!master_ctl) { @@ -417,12 +413,8 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) /* TODO: This really shouldn't be copied+pasted */ - spin_lock(&xe->irq.lock); - if (!xe->irq.enabled) { - spin_unlock(&xe->irq.lock); + if (!atomic_read(&xe->irq.enabled)) return IRQ_NONE; - } - spin_unlock(&xe->irq.lock); master_tile_ctl = dg1_intr_disable(xe); if (!master_tile_ctl) { @@ -459,7 +451,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg) * the primary tile. */ if (id == 0) { - if (HAS_HECI_CSCFI(xe)) + if (xe->info.has_heci_cscfi) xe_heci_csc_irq_handler(xe, master_ctl); xe_display_irq_handler(xe, master_ctl); gu_misc_iir = gu_misc_irq_ack(xe, master_ctl); @@ -508,7 +500,7 @@ static void gt_irq_reset(struct xe_tile *tile) if ((tile->media_gt && xe_hw_engine_mask_per_class(tile->media_gt, XE_ENGINE_CLASS_OTHER)) || - HAS_HECI_GSCFI(tile_to_xe(tile))) { + tile_to_xe(tile)->info.has_heci_gscfi) { xe_mmio_write32(mmio, GUNIT_GSC_INTR_ENABLE, 0); xe_mmio_write32(mmio, GUNIT_GSC_INTR_MASK, ~0); xe_mmio_write32(mmio, HECI2_RSVD_INTR_MASK, ~0); @@ -644,12 +636,8 @@ static irqreturn_t vf_mem_irq_handler(int irq, void *arg) struct xe_tile *tile; unsigned int id; - spin_lock(&xe->irq.lock); - if (!xe->irq.enabled) { - spin_unlock(&xe->irq.lock); + if (!atomic_read(&xe->irq.enabled)) return IRQ_NONE; - } - spin_unlock(&xe->irq.lock); for_each_tile(tile, xe, id) xe_memirq_handler(&tile->memirq); @@ -674,10 +662,9 @@ static void irq_uninstall(void *arg) struct pci_dev *pdev = to_pci_dev(xe->drm.dev); int irq; - if (!xe->irq.enabled) + if (!atomic_xchg(&xe->irq.enabled, 0)) return; - xe->irq.enabled = false; xe_irq_reset(xe); irq = pci_irq_vector(pdev, 0); @@ -724,7 +711,7 @@ int xe_irq_install(struct xe_device *xe) return err; } - xe->irq.enabled = true; + atomic_set(&xe->irq.enabled, 1); xe_irq_postinstall(xe); @@ -744,9 +731,7 @@ void xe_irq_suspend(struct xe_device *xe) { int irq = to_pci_dev(xe->drm.dev)->irq; - spin_lock_irq(&xe->irq.lock); - xe->irq.enabled = false; /* no new irqs */ - spin_unlock_irq(&xe->irq.lock); + atomic_set(&xe->irq.enabled, 0); /* no new irqs */ synchronize_irq(irq); /* flush irqs */ xe_irq_reset(xe); /* turn irqs off */ @@ -762,7 +747,7 @@ void xe_irq_resume(struct xe_device *xe) * 1. no irq will arrive before the postinstall * 2. display is not yet resumed */ - xe->irq.enabled = true; + atomic_set(&xe->irq.enabled, 1); xe_irq_reset(xe); xe_irq_postinstall(xe); /* turn irqs on */ diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 4f64c7f4e68d..22e58c6e2a35 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -25,6 +25,7 @@ #include "xe_map.h" #include "xe_memirq.h" #include "xe_sriov.h" +#include "xe_trace_lrc.h" #include "xe_vm.h" #include "xe_wa.h" @@ -1060,6 +1061,14 @@ u32 xe_lrc_ring_tail(struct xe_lrc *lrc) return xe_lrc_read_ctx_reg(lrc, CTX_RING_TAIL) & TAIL_ADDR; } +static u32 xe_lrc_ring_start(struct xe_lrc *lrc) +{ + if (xe_lrc_has_indirect_ring_state(lrc)) + return xe_lrc_read_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START); + else + return xe_lrc_read_ctx_reg(lrc, CTX_RING_START); +} + void xe_lrc_set_ring_head(struct xe_lrc *lrc, u32 head) { if (xe_lrc_has_indirect_ring_state(lrc)) @@ -1635,10 +1644,12 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) xe_vm_get(lrc->bo->vm); snapshot->context_desc = xe_lrc_ggtt_addr(lrc); + snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc); snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); snapshot->head = xe_lrc_ring_head(lrc); snapshot->tail.internal = lrc->ring.tail; snapshot->tail.memory = xe_lrc_ring_tail(lrc); + snapshot->start = xe_lrc_ring_start(lrc); snapshot->start_seqno = xe_lrc_start_seqno(lrc); snapshot->seqno = xe_lrc_seqno(lrc); snapshot->lrc_bo = xe_bo_get(lrc->bo); @@ -1692,11 +1703,14 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer return; drm_printf(p, "\tHW Context Desc: 0x%08x\n", snapshot->context_desc); + drm_printf(p, "\tHW Ring address: 0x%08x\n", + snapshot->ring_addr); drm_printf(p, "\tHW Indirect Ring State: 0x%08x\n", snapshot->indirect_context_desc); drm_printf(p, "\tLRC Head: (memory) %u\n", snapshot->head); drm_printf(p, "\tLRC Tail: (internal) %u, (memory) %u\n", snapshot->tail.internal, snapshot->tail.memory); + drm_printf(p, "\tRing start: (memory) 0x%08x\n", snapshot->start); drm_printf(p, "\tStart seqno: (memory) %d\n", snapshot->start_seqno); drm_printf(p, "\tSeqno: (memory) %d\n", snapshot->seqno); drm_printf(p, "\tTimestamp: 0x%08x\n", snapshot->ctx_timestamp); @@ -1758,5 +1772,20 @@ u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts) lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc); + trace_xe_lrc_update_timestamp(lrc, *old_ts); + return lrc->ctx_timestamp; } + +/** + * xe_lrc_ring_is_idle() - LRC is idle + * @lrc: Pointer to the lrc. + * + * Compare LRC ring head and tail to determine if idle. + * + * Return: True is ring is idle, False otherwise + */ +bool xe_lrc_ring_is_idle(struct xe_lrc *lrc) +{ + return xe_lrc_ring_head(lrc) == xe_lrc_ring_tail(lrc); +} diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h index 40d8f6906d3e..b459dcab8787 100644 --- a/drivers/gpu/drm/xe/xe_lrc.h +++ b/drivers/gpu/drm/xe/xe_lrc.h @@ -25,8 +25,10 @@ struct xe_lrc_snapshot { unsigned long lrc_size, lrc_offset; u32 context_desc; + u32 ring_addr; u32 indirect_context_desc; u32 head; + u32 start; struct { u32 internal; u32 memory; @@ -78,6 +80,8 @@ u32 xe_lrc_ring_head(struct xe_lrc *lrc); u32 xe_lrc_ring_space(struct xe_lrc *lrc); void xe_lrc_write_ring(struct xe_lrc *lrc, const void *data, size_t size); +bool xe_lrc_ring_is_idle(struct xe_lrc *lrc); + u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc); u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc); u32 *xe_lrc_regs(struct xe_lrc *lrc); diff --git a/drivers/gpu/drm/xe/xe_macros.h b/drivers/gpu/drm/xe/xe_macros.h index daf56c846d03..8a77c2423555 100644 --- a/drivers/gpu/drm/xe/xe_macros.h +++ b/drivers/gpu/drm/xe/xe_macros.h @@ -10,9 +10,13 @@ #define XE_WARN_ON WARN_ON -#define XE_IOCTL_DBG(xe, cond) \ - ((cond) && (drm_dbg(&(xe)->drm, \ - "Ioctl argument check failed at %s:%d: %s", \ - __FILE__, __LINE__, #cond), 1)) +#define XE_IOCTL_DBG(xe, cond) ({ \ + int cond__ = !!(cond); \ + if (cond__) \ + drm_dbg(&(xe)->drm, \ + "Ioctl argument check failed at %s:%d: %s", \ + __FILE__, __LINE__, #cond); \ + cond__; \ +}) #endif diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c index f833da88150a..404fa2a456d5 100644 --- a/drivers/gpu/drm/xe/xe_memirq.c +++ b/drivers/gpu/drm/xe/xe_memirq.c @@ -155,13 +155,6 @@ static const char *guc_name(struct xe_guc *guc) * */ -static void __release_xe_bo(struct drm_device *drm, void *arg) -{ - struct xe_bo *bo = arg; - - xe_bo_unpin_map_no_vm(bo); -} - static inline bool hw_reports_to_instance_zero(struct xe_memirq *memirq) { /* @@ -184,14 +177,12 @@ static int memirq_alloc_pages(struct xe_memirq *memirq) BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_SOURCE_OFFSET(0), SZ_64)); BUILD_BUG_ON(!IS_ALIGNED(XE_MEMIRQ_STATUS_OFFSET(0), SZ_4K)); - /* XXX: convert to managed bo */ - bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size, - ttm_bo_type_kernel, - XE_BO_FLAG_SYSTEM | - XE_BO_FLAG_GGTT | - XE_BO_FLAG_GGTT_INVALIDATE | - XE_BO_FLAG_NEEDS_UC | - XE_BO_FLAG_NEEDS_CPU_ACCESS); + bo = xe_managed_bo_create_pin_map(xe, tile, bo_size, + XE_BO_FLAG_SYSTEM | + XE_BO_FLAG_GGTT | + XE_BO_FLAG_GGTT_INVALIDATE | + XE_BO_FLAG_NEEDS_UC | + XE_BO_FLAG_NEEDS_CPU_ACCESS); if (IS_ERR(bo)) { err = PTR_ERR(bo); goto out; @@ -215,7 +206,7 @@ static int memirq_alloc_pages(struct xe_memirq *memirq) xe_bo_ggtt_addr(bo), bo_size, XE_MEMIRQ_SOURCE_OFFSET(0), XE_MEMIRQ_STATUS_OFFSET(0)); - return drmm_add_action_or_reset(&xe->drm, __release_xe_bo, memirq->bo); + return 0; out: memirq_err(memirq, "Failed to allocate memirq page (%pe)\n", ERR_PTR(err)); @@ -442,6 +433,9 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name)) xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST); + + if (memirq_received(memirq, status, ilog2(GUC_INTR_SW_INT_0), name)) + xe_guc_irq_handler(guc, GUC_INTR_SW_INT_0); } /** diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c index bfc3deebdaa2..07b27114be9a 100644 --- a/drivers/gpu/drm/xe/xe_module.c +++ b/drivers/gpu/drm/xe/xe_module.c @@ -19,7 +19,7 @@ struct xe_modparam xe_modparam = { .probe_display = true, - .guc_log_level = 5, + .guc_log_level = 3, .force_probe = CONFIG_DRM_XE_FORCE_PROBE, .wedged_mode = 1, /* the rest are 0 by default */ diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 8dd55798ab31..ec88b18e9baa 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -96,6 +96,7 @@ struct xe_oa_open_param { struct drm_xe_sync __user *syncs_user; int num_syncs; struct xe_sync_entry *syncs; + size_t oa_buffer_size; }; struct xe_oa_config_bo { @@ -403,11 +404,19 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { - struct xe_mmio *mmio = &stream->gt->mmio; u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; + int size_exponent = __ffs(stream->oa_buffer.bo->size); + u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; + struct xe_mmio *mmio = &stream->gt->mmio; unsigned long flags; + /* + * If oa buffer size is more than 16MB (exponent greater than 24), the + * oa buffer size field is multiplied by 8 in xe_oa_enable_metric_set. + */ + oa_buf |= REG_FIELD_PREP(OABUFFER_SIZE_MASK, + size_exponent > 24 ? size_exponent - 20 : size_exponent - 17); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0); @@ -901,15 +910,12 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) xe_file_put(stream->xef); } -static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) +static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { struct xe_bo *bo; - BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE); - BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M); - bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, - XE_OA_BUFFER_SIZE, ttm_bo_type_kernel, + size, ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -1087,6 +1093,13 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) 0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); } +static u32 oag_buf_size_select(const struct xe_oa_stream *stream) +{ + return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, + stream->oa_buffer.bo->size > SZ_16M ? + OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); +} + static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) { struct xe_mmio *mmio = &stream->gt->mmio; @@ -1119,6 +1132,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, _MASKED_BIT_ENABLE(oa_debug) | oag_report_ctx_switches(stream) | + oag_buf_size_select(stream) | oag_configure_mmio_trigger(stream, true)); xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? @@ -1260,6 +1274,17 @@ static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value, return 0; } +static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (!is_power_of_2(value) || value < SZ_128K || value > SZ_128M) { + drm_dbg(&oa->xe->drm, "OA buffer size invalid %llu\n", value); + return -EINVAL; + } + param->oa_buffer_size = value; + return 0; +} + static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value, struct xe_oa_open_param *param) { @@ -1280,6 +1305,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = { [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, + [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size, }; static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { @@ -1294,6 +1320,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval, [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, + [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval, }; static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from, @@ -1553,7 +1580,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) { - struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, }; + struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; void __user *uaddr = (void __user *)arg; if (copy_to_user(uaddr, &info, sizeof(info))) @@ -1639,7 +1666,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) } /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ - if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) { + if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); return -EINVAL; } @@ -1783,9 +1810,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, if (GRAPHICS_VER(stream->oa->xe) >= 20 && stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) stream->oa_buffer.circ_size = - XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size; + param->oa_buffer_size - + param->oa_buffer_size % stream->oa_buffer.format->size; else - stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE; + stream->oa_buffer.circ_size = param->oa_buffer_size; if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { /* If we don't find the context offset, just return error */ @@ -1828,7 +1856,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, goto err_fw_put; } - ret = xe_oa_alloc_oa_buffer(stream); + ret = xe_oa_alloc_oa_buffer(stream, param->oa_buffer_size); if (ret) goto err_fw_put; @@ -2125,6 +2153,9 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); } + if (!param.oa_buffer_size) + param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE; + ret = xe_oa_parse_syncs(oa, ¶m); if (ret) goto err_exec_q; diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index fea9d981e414..df7793915628 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -15,7 +15,7 @@ #include "regs/xe_reg_defs.h" #include "xe_hw_engine_types.h" -#define XE_OA_BUFFER_SIZE SZ_16M +#define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M enum xe_oa_report_header { HDR_32_BIT = 0, diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index 96f3e16a5b7d..a6761cb769b2 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -738,9 +738,6 @@ void xe_pm_d3cold_allowed_toggle(struct xe_device *xe) xe->d3cold.allowed = false; mutex_unlock(&xe->d3cold.lock); - - drm_dbg(&xe->drm, - "d3cold: allowed=%s\n", str_yes_no(xe->d3cold.allowed)); } /** diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index f27f579f4d85..65c3c1688710 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -136,6 +136,7 @@ err_kfree: xe_pt_free(pt); return ERR_PTR(err); } +ALLOW_ERROR_INJECTION(xe_pt_create, ERRNO); /** * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero @@ -1333,8 +1334,7 @@ static void invalidation_fence_cb(struct dma_fence *fence, queue_work(system_wq, &ifence->work); } else { ifence->base.base.error = ifence->fence->error; - dma_fence_signal(&ifence->base.base); - dma_fence_put(&ifence->base.base); + xe_gt_tlb_invalidation_fence_signal(&ifence->base); } dma_fence_put(ifence->fence); } @@ -1851,6 +1851,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops) return 0; } +ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO); static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile, struct xe_vm_pgtable_update_ops *pt_update_ops, @@ -2131,6 +2132,7 @@ kill_vm_tile1: return ERR_PTR(err); } +ALLOW_ERROR_INJECTION(xe_pt_update_ops_run, ERRNO); /** * xe_pt_update_ops_fini() - Finish PT update operations diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 170ae72d1a7b..d2a816f71bf2 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -23,6 +23,7 @@ #include "xe_guc_hwconfig.h" #include "xe_macros.h" #include "xe_mmio.h" +#include "xe_oa.h" #include "xe_ttm_vram_mgr.h" #include "xe_wa.h" @@ -670,7 +671,8 @@ static int query_oa_units(struct xe_device *xe, du->oa_unit_id = u->oa_unit_id; du->oa_unit_type = u->type; du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); - du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS; + du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | + DRM_XE_OA_CAPS_OA_BUFFER_SIZE; j = 0; for_each_hw_engine(hwe, gt, hwe_id) { diff --git a/drivers/gpu/drm/xe/xe_reg_sr.c b/drivers/gpu/drm/xe/xe_reg_sr.c index e1a0e27cda14..9475e3f74958 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr.c +++ b/drivers/gpu/drm/xe/xe_reg_sr.c @@ -24,49 +24,29 @@ #include "xe_hw_engine_types.h" #include "xe_macros.h" #include "xe_mmio.h" -#include "xe_reg_whitelist.h" #include "xe_rtp_types.h" -#define XE_REG_SR_GROW_STEP_DEFAULT 16 - static void reg_sr_fini(struct drm_device *drm, void *arg) { struct xe_reg_sr *sr = arg; + struct xe_reg_sr_entry *entry; + unsigned long reg; + + xa_for_each(&sr->xa, reg, entry) + kfree(entry); xa_destroy(&sr->xa); - kfree(sr->pool.arr); - memset(&sr->pool, 0, sizeof(sr->pool)); } int xe_reg_sr_init(struct xe_reg_sr *sr, const char *name, struct xe_device *xe) { xa_init(&sr->xa); - memset(&sr->pool, 0, sizeof(sr->pool)); - sr->pool.grow_step = XE_REG_SR_GROW_STEP_DEFAULT; sr->name = name; return drmm_add_action_or_reset(&xe->drm, reg_sr_fini, sr); } EXPORT_SYMBOL_IF_KUNIT(xe_reg_sr_init); -static struct xe_reg_sr_entry *alloc_entry(struct xe_reg_sr *sr) -{ - if (sr->pool.used == sr->pool.allocated) { - struct xe_reg_sr_entry *arr; - - arr = krealloc_array(sr->pool.arr, - ALIGN(sr->pool.allocated + 1, sr->pool.grow_step), - sizeof(*arr), GFP_KERNEL); - if (!arr) - return NULL; - - sr->pool.arr = arr; - sr->pool.allocated += sr->pool.grow_step; - } - - return &sr->pool.arr[sr->pool.used++]; -} - static bool compatible_entries(const struct xe_reg_sr_entry *e1, const struct xe_reg_sr_entry *e2) { @@ -112,7 +92,7 @@ int xe_reg_sr_add(struct xe_reg_sr *sr, return 0; } - pentry = alloc_entry(sr); + pentry = kmalloc(sizeof(*pentry), GFP_KERNEL); if (!pentry) { ret = -ENOMEM; goto fail; @@ -211,58 +191,6 @@ err_force_wake: xe_gt_err(gt, "Failed to apply, err=-ETIMEDOUT\n"); } -void xe_reg_sr_apply_whitelist(struct xe_hw_engine *hwe) -{ - struct xe_reg_sr *sr = &hwe->reg_whitelist; - struct xe_gt *gt = hwe->gt; - struct xe_device *xe = gt_to_xe(gt); - struct xe_reg_sr_entry *entry; - struct drm_printer p; - u32 mmio_base = hwe->mmio_base; - unsigned long reg; - unsigned int slot = 0; - unsigned int fw_ref; - - if (xa_empty(&sr->xa)) - return; - - drm_dbg(&xe->drm, "Whitelisting %s registers\n", sr->name); - - fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); - if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) - goto err_force_wake; - - p = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL); - xa_for_each(&sr->xa, reg, entry) { - if (slot == RING_MAX_NONPRIV_SLOTS) { - xe_gt_err(gt, - "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n", - hwe->name, RING_MAX_NONPRIV_SLOTS); - break; - } - - xe_reg_whitelist_print_entry(&p, 0, reg, entry); - xe_mmio_write32(>->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), - reg | entry->set_bits); - slot++; - } - - /* And clear the rest just in case of garbage */ - for (; slot < RING_MAX_NONPRIV_SLOTS; slot++) { - u32 addr = RING_NOPID(mmio_base).addr; - - xe_mmio_write32(>->mmio, RING_FORCE_TO_NONPRIV(mmio_base, slot), addr); - } - - xe_force_wake_put(gt_to_fw(gt), fw_ref); - - return; - -err_force_wake: - xe_force_wake_put(gt_to_fw(gt), fw_ref); - drm_err(&xe->drm, "Failed to apply, err=-ETIMEDOUT\n"); -} - /** * xe_reg_sr_dump - print all save/restore entries * @sr: Save/restore entries diff --git a/drivers/gpu/drm/xe/xe_reg_sr_types.h b/drivers/gpu/drm/xe/xe_reg_sr_types.h index ad48a52b824a..ebe11f237fa2 100644 --- a/drivers/gpu/drm/xe/xe_reg_sr_types.h +++ b/drivers/gpu/drm/xe/xe_reg_sr_types.h @@ -20,12 +20,6 @@ struct xe_reg_sr_entry { }; struct xe_reg_sr { - struct { - struct xe_reg_sr_entry *arr; - unsigned int used; - unsigned int allocated; - unsigned int grow_step; - } pool; struct xarray xa; const char *name; diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c index 3996934974fa..edab5d4e3ba5 100644 --- a/drivers/gpu/drm/xe/xe_reg_whitelist.c +++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c @@ -10,7 +10,9 @@ #include "regs/xe_oa_regs.h" #include "regs/xe_regs.h" #include "xe_gt_types.h" +#include "xe_gt_printk.h" #include "xe_platform_types.h" +#include "xe_reg_sr.h" #include "xe_rtp.h" #include "xe_step.h" @@ -89,6 +91,40 @@ static const struct xe_rtp_entry_sr register_whitelist[] = { {} }; +static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe) +{ + struct xe_reg_sr *sr = &hwe->reg_whitelist; + struct xe_reg_sr_entry *entry; + struct drm_printer p; + unsigned long reg; + unsigned int slot; + + xe_gt_dbg(hwe->gt, "Add %s whitelist to engine\n", sr->name); + p = xe_gt_dbg_printer(hwe->gt); + + slot = 0; + xa_for_each(&sr->xa, reg, entry) { + struct xe_reg_sr_entry hwe_entry = { + .reg = RING_FORCE_TO_NONPRIV(hwe->mmio_base, slot), + .set_bits = entry->reg.addr | entry->set_bits, + .clr_bits = ~0u, + .read_mask = entry->read_mask, + }; + + if (slot == RING_MAX_NONPRIV_SLOTS) { + xe_gt_err(hwe->gt, + "hwe %s: maximum register whitelist slots (%d) reached, refusing to add more\n", + hwe->name, RING_MAX_NONPRIV_SLOTS); + break; + } + + xe_reg_whitelist_print_entry(&p, 0, reg, entry); + xe_reg_sr_add(&hwe->reg_sr, &hwe_entry, hwe->gt); + + slot++; + } +} + /** * xe_reg_whitelist_process_engine - process table of registers to whitelist * @hwe: engine instance to process whitelist for @@ -102,6 +138,7 @@ void xe_reg_whitelist_process_engine(struct xe_hw_engine *hwe) struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(hwe); xe_rtp_process_to_sr(&ctx, register_whitelist, &hwe->reg_whitelist); + whitelist_apply_to_hwe(hwe); } /** diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c index ef10782af656..04e2f539ccd9 100644 --- a/drivers/gpu/drm/xe/xe_sriov.c +++ b/drivers/gpu/drm/xe/xe_sriov.c @@ -14,6 +14,7 @@ #include "xe_mmio.h" #include "xe_sriov.h" #include "xe_sriov_pf.h" +#include "xe_sriov_vf.h" /** * xe_sriov_mode_to_string - Convert enum value to string. @@ -114,6 +115,9 @@ int xe_sriov_init(struct xe_device *xe) return err; } + if (IS_SRIOV_VF(xe)) + xe_sriov_vf_init_early(xe); + xe_assert(xe, !xe->sriov.wq); xe->sriov.wq = alloc_workqueue("xe-sriov-wq", 0, 0); if (!xe->sriov.wq) diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h index 7d156ba82479..dd1df950b021 100644 --- a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h +++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h @@ -20,7 +20,7 @@ * is within a range of supported VF numbers (up to maximum number of VFs that * driver can support, including VF0 that represents the PF itself). * - * Note: Effective only on debug builds. See `Xe ASSERTs`_ for more information. + * Note: Effective only on debug builds. See `Xe Asserts`_ for more information. */ #define xe_sriov_pf_assert_vfid(xe, vfid) \ xe_assert((xe), (vfid) <= xe_sriov_pf_get_totalvfs(xe)) diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h index c7b7ad4af5c8..ca94382a721e 100644 --- a/drivers/gpu/drm/xe/xe_sriov_types.h +++ b/drivers/gpu/drm/xe/xe_sriov_types.h @@ -9,6 +9,7 @@ #include <linux/build_bug.h> #include <linux/mutex.h> #include <linux/types.h> +#include <linux/workqueue_types.h> /** * VFID - Virtual Function Identifier @@ -56,4 +57,20 @@ struct xe_device_pf { struct mutex master_lock; }; +/** + * struct xe_device_vf - Xe Virtual Function related data + * + * The data in this structure is valid only if driver is running in the + * @XE_SRIOV_MODE_VF mode. + */ +struct xe_device_vf { + /** @migration: VF Migration state data */ + struct { + /** @migration.worker: VF migration recovery worker */ + struct work_struct worker; + /** @migration.gt_flags: Per-GT request flags for VF migration recovery */ + unsigned long gt_flags; + } migration; +}; + #endif diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c new file mode 100644 index 000000000000..c1275e64aa9c --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include <drm/drm_managed.h> + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_gt_sriov_printk.h" +#include "xe_gt_sriov_vf.h" +#include "xe_pm.h" +#include "xe_sriov.h" +#include "xe_sriov_printk.h" +#include "xe_sriov_vf.h" + +/** + * DOC: VF restore procedure in PF KMD and VF KMD + * + * Restoring previously saved state of a VF is one of core features of + * SR-IOV. All major VM Management applications allow saving and restoring + * the VM state, and doing that to a VM which uses SRIOV VF as one of + * the accessible devices requires support from KMD on both PF and VF side. + * VMM initiates all required operations through VFIO module, which then + * translates them into PF KMD calls. This description will focus on these + * calls, leaving out the module which initiates these steps (VFIO). + * + * In order to start the restore procedure, GuC needs to keep the VF in + * proper state. The PF driver can ensure GuC set it to VF_READY state + * by provisioning the VF, which in turn can be done after Function Level + * Reset of said VF (or after it was freshly created - in that case FLR + * is not needed). The FLR procedure ends with GuC sending message + * `GUC_PF_NOTIFY_VF_FLR_DONE`, and then provisioning data is sent to GuC. + * After the provisioning is completed, the VF needs to be paused, and + * at that point the actual restore can begin. + * + * During VF Restore, state of several resources is restored. These may + * include local memory content (system memory is restored by VMM itself), + * values of MMIO registers, stateless compression metadata and others. + * The final resource which also needs restoring is state of the VF + * submission maintained within GuC. For that, `GUC_PF_OPCODE_VF_RESTORE` + * message is used, with reference to the state blob to be consumed by + * GuC. + * + * Next, when VFIO is asked to set the VM into running state, the PF driver + * sends `GUC_PF_TRIGGER_VF_RESUME` to GuC. When sent after restore, this + * changes VF state within GuC to `VF_RESFIX_BLOCKED` rather than the + * usual `VF_RUNNING`. At this point GuC triggers an interrupt to inform + * the VF KMD within the VM that it was migrated. + * + * As soon as Virtual GPU of the VM starts, the VF driver within receives + * the MIGRATED interrupt and schedules post-migration recovery worker. + * That worker queries GuC for new provisioning (using MMIO communication), + * and applies fixups to any non-virtualized resources used by the VF. + * + * When the VF driver is ready to continue operation on the newly connected + * hardware, it sends `VF2GUC_NOTIFY_RESFIX_DONE` which causes it to + * enter the long awaited `VF_RUNNING` state, and therefore start handling + * CTB messages and scheduling workloads from the VF:: + * + * PF GuC VF + * [ ] | | + * [ ] PF2GUC_VF_CONTROL(pause) | | + * [ ]---------------------------> [ ] | + * [ ] [ ] GuC sets new VF state to | + * [ ] [ ]------- VF_READY_PAUSED | + * [ ] [ ] | | + * [ ] [ ] <----- | + * [ ] success [ ] | + * [ ] <---------------------------[ ] | + * [ ] | | + * [ ] PF loads resources from the | | + * [ ]------- saved image supplied | | + * [ ] | | | + * [ ] <----- | | + * [ ] | | + * [ ] GUC_PF_OPCODE_VF_RESTORE | | + * [ ]---------------------------> [ ] | + * [ ] [ ] GuC loads contexts and CTB | + * [ ] [ ]------- state from image | + * [ ] [ ] | | + * [ ] [ ] <----- | + * [ ] [ ] | + * [ ] [ ] GuC sets new VF state to | + * [ ] [ ]------- VF_RESFIX_PAUSED | + * [ ] [ ] | | + * [ ] success [ ] <----- | + * [ ] <---------------------------[ ] | + * [ ] | | + * [ ] GUC_PF_TRIGGER_VF_RESUME | | + * [ ]---------------------------> [ ] | + * [ ] [ ] GuC sets new VF state to | + * [ ] [ ]------- VF_RESFIX_BLOCKED | + * [ ] [ ] | | + * [ ] [ ] <----- | + * [ ] [ ] | + * [ ] [ ] GUC_INTR_SW_INT_0 | + * [ ] success [ ]---------------------------> [ ] + * [ ] <---------------------------[ ] [ ] + * | | VF2GUC_QUERY_SINGLE_KLV [ ] + * | [ ] <---------------------------[ ] + * | [ ] [ ] + * | [ ] new VF provisioning [ ] + * | [ ]---------------------------> [ ] + * | | [ ] + * | | VF driver applies post [ ] + * | | migration fixups -------[ ] + * | | | [ ] + * | | -----> [ ] + * | | [ ] + * | | VF2GUC_NOTIFY_RESFIX_DONE [ ] + * | [ ] <---------------------------[ ] + * | [ ] [ ] + * | [ ] GuC sets new VF state to [ ] + * | [ ]------- VF_RUNNING [ ] + * | [ ] | [ ] + * | [ ] <----- [ ] + * | [ ] success [ ] + * | [ ]---------------------------> [ ] + * | | | + * | | | + */ + +static void migration_worker_func(struct work_struct *w); + +/** + * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data. + * @xe: the &xe_device to initialize + */ +void xe_sriov_vf_init_early(struct xe_device *xe) +{ + INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func); +} + +/** + * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning. + * @xe: the &xe_device struct instance + * + * After migration, we need to re-query all VF configuration to make sure + * they match previous provisioning. Note that most of VF provisioning + * shall be the same, except GGTT range, since GGTT is not virtualized per-VF. + * + * Returns: 0 if the operation completed successfully, or a negative error + * code otherwise. + */ +static int vf_post_migration_requery_guc(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + int err, ret = 0; + + for_each_gt(gt, xe, id) { + err = xe_gt_sriov_vf_query_config(gt); + ret = ret ?: err; + } + + return ret; +} + +/* + * vf_post_migration_imminent - Check if post-restore recovery is coming. + * @xe: the &xe_device struct instance + * + * Return: True if migration recovery worker will soon be running. Any worker currently + * executing does not affect the result. + */ +static bool vf_post_migration_imminent(struct xe_device *xe) +{ + return xe->sriov.vf.migration.gt_flags != 0 || + work_pending(&xe->sriov.vf.migration.worker); +} + +/* + * Notify all GuCs about resource fixups apply finished. + */ +static void vf_post_migration_notify_resfix_done(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + + for_each_gt(gt, xe, id) { + if (vf_post_migration_imminent(xe)) + goto skip; + xe_gt_sriov_vf_notify_resfix_done(gt); + } + return; + +skip: + drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n"); +} + +static void vf_post_migration_recovery(struct xe_device *xe) +{ + int err; + + drm_dbg(&xe->drm, "migration recovery in progress\n"); + xe_pm_runtime_get(xe); + err = vf_post_migration_requery_guc(xe); + if (vf_post_migration_imminent(xe)) + goto defer; + if (unlikely(err)) + goto fail; + + /* FIXME: add the recovery steps */ + vf_post_migration_notify_resfix_done(xe); + xe_pm_runtime_put(xe); + drm_notice(&xe->drm, "migration recovery ended\n"); + return; +defer: + xe_pm_runtime_put(xe); + drm_dbg(&xe->drm, "migration recovery deferred\n"); + return; +fail: + xe_pm_runtime_put(xe); + drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err)); + xe_device_declare_wedged(xe); +} + +static void migration_worker_func(struct work_struct *w) +{ + struct xe_device *xe = container_of(w, struct xe_device, + sriov.vf.migration.worker); + + vf_post_migration_recovery(xe); +} + +static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe) +{ + struct xe_gt *gt; + unsigned int id; + + for_each_gt(gt, xe, id) { + if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) { + xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n"); + return false; + } + } + return true; +} + +/** + * xe_sriov_vf_start_migration_recovery - Start VF migration recovery. + * @xe: the &xe_device to start recovery on + * + * This function shall be called only by VF. + */ +void xe_sriov_vf_start_migration_recovery(struct xe_device *xe) +{ + bool started; + + xe_assert(xe, IS_SRIOV_VF(xe)); + + if (!vf_ready_to_recovery_on_all_gts(xe)) + return; + + WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0); + /* Ensure other threads see that no flags are set now. */ + smp_mb(); + + started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker); + drm_info(&xe->drm, "VF migration recovery %s\n", started ? + "scheduled" : "already in progress"); +} diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h new file mode 100644 index 000000000000..7b8622cff2b7 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_sriov_vf.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_SRIOV_VF_H_ +#define _XE_SRIOV_VF_H_ + +struct xe_device; + +void xe_sriov_vf_init_early(struct xe_device *xe); +void xe_sriov_vf_start_migration_recovery(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index 91130ad8999c..d5281de04d54 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -211,6 +211,7 @@ DECLARE_EVENT_CLASS(xe_sched_job, __string(dev, __dev_name_eq(job->q)) __field(u32, seqno) __field(u32, lrc_seqno) + __field(u8, gt_id) __field(u16, guc_id) __field(u32, guc_state) __field(u32, flags) @@ -223,6 +224,7 @@ DECLARE_EVENT_CLASS(xe_sched_job, __assign_str(dev); __entry->seqno = xe_sched_job_seqno(job); __entry->lrc_seqno = xe_sched_job_lrc_seqno(job); + __entry->gt_id = job->q->gt->info.id; __entry->guc_id = job->q->guc->id; __entry->guc_state = atomic_read(&job->q->guc->state); @@ -232,9 +234,9 @@ DECLARE_EVENT_CLASS(xe_sched_job, __entry->batch_addr = (u64)job->ptrs[0].batch_addr; ), - TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", + TP_printk("dev=%s, fence=%p, seqno=%u, lrc_seqno=%u, gt=%u, guc_id=%d, batch_addr=0x%012llx, guc_state=0x%x, flags=0x%x, error=%d", __get_str(dev), __entry->fence, __entry->seqno, - __entry->lrc_seqno, __entry->guc_id, + __entry->lrc_seqno, __entry->gt_id, __entry->guc_id, __entry->batch_addr, __entry->guc_state, __entry->flags, __entry->error) ); @@ -282,6 +284,7 @@ DECLARE_EVENT_CLASS(xe_sched_msg, __string(dev, __dev_name_eq(((struct xe_exec_queue *)msg->private_data))) __field(u32, opcode) __field(u16, guc_id) + __field(u8, gt_id) ), TP_fast_assign( @@ -289,9 +292,11 @@ DECLARE_EVENT_CLASS(xe_sched_msg, __entry->opcode = msg->opcode; __entry->guc_id = ((struct xe_exec_queue *)msg->private_data)->guc->id; + __entry->gt_id = + ((struct xe_exec_queue *)msg->private_data)->gt->info.id; ), - TP_printk("dev=%s, guc_id=%d, opcode=%u", __get_str(dev), __entry->guc_id, + TP_printk("dev=%s, gt=%u guc_id=%d, opcode=%u", __get_str(dev), __entry->gt_id, __entry->guc_id, __entry->opcode) ); diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h index 30a3cfbaaa09..1762dd30ba6d 100644 --- a/drivers/gpu/drm/xe/xe_trace_bo.h +++ b/drivers/gpu/drm/xe/xe_trace_bo.h @@ -48,6 +48,11 @@ DEFINE_EVENT(xe_bo, xe_bo_cpu_fault, TP_ARGS(bo) ); +DEFINE_EVENT(xe_bo, xe_bo_validate, + TP_PROTO(struct xe_bo *bo), + TP_ARGS(bo) +); + TRACE_EVENT(xe_bo_move, TP_PROTO(struct xe_bo *bo, uint32_t new_placement, uint32_t old_placement, bool move_lacks_source), diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.c b/drivers/gpu/drm/xe/xe_trace_lrc.c new file mode 100644 index 000000000000..ab9b7e2970bc --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace_lrc.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright © 2024 Intel Corporation + */ + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "xe_trace_lrc.h" +#endif diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h new file mode 100644 index 000000000000..5c669a0b2180 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_trace_lrc.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2024 Intel Corporation + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xe + +#if !defined(_XE_TRACE_LRC_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _XE_TRACE_LRC_H_ + +#include <linux/tracepoint.h> +#include <linux/types.h> + +#include "xe_gt_types.h" +#include "xe_lrc.h" +#include "xe_lrc_types.h" + +#define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev) + +TRACE_EVENT(xe_lrc_update_timestamp, + TP_PROTO(struct xe_lrc *lrc, uint32_t old), + TP_ARGS(lrc, old), + TP_STRUCT__entry( + __field(struct xe_lrc *, lrc) + __field(u32, old) + __field(u32, new) + __string(name, lrc->fence_ctx.name) + __string(device_id, __dev_name_lrc(lrc)) + ), + + TP_fast_assign( + __entry->lrc = lrc; + __entry->old = old; + __entry->new = lrc->ctx_timestamp; + __assign_str(name); + __assign_str(device_id); + ), + TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s", + __entry->lrc, __get_str(name), + __entry->old, __entry->new, + __get_str(device_id)) +); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/xe +#define TRACE_INCLUDE_FILE xe_trace_lrc +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index 423b261ea743..c95728c45ea4 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -52,7 +52,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, struct xe_ttm_vram_mgr *mgr = to_xe_ttm_vram_mgr(man); struct xe_ttm_vram_mgr_resource *vres; struct drm_buddy *mm = &mgr->mm; - u64 size, remaining_size, min_page_size; + u64 size, min_page_size; unsigned long lpfn; int err; @@ -98,17 +98,6 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, goto error_fini; } - if (WARN_ON(min_page_size > SZ_2G)) { /* FIXME: sg limit */ - err = -EINVAL; - goto error_fini; - } - - if (WARN_ON((size > SZ_2G && - (vres->base.placement & TTM_PL_FLAG_CONTIGUOUS)))) { - err = -EINVAL; - goto error_fini; - } - if (WARN_ON(!IS_ALIGNED(size, min_page_size))) { err = -EINVAL; goto error_fini; @@ -116,12 +105,11 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, mutex_lock(&mgr->lock); if (lpfn <= mgr->visible_size >> PAGE_SHIFT && size > mgr->visible_avail) { - mutex_unlock(&mgr->lock); err = -ENOSPC; - goto error_fini; + goto error_unlock; } - if (place->fpfn + (size >> PAGE_SHIFT) != place->lpfn && + if (place->fpfn + (size >> PAGE_SHIFT) != lpfn && place->flags & TTM_PL_FLAG_CONTIGUOUS) { size = roundup_pow_of_two(size); min_page_size = size; @@ -129,25 +117,11 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, lpfn = max_t(unsigned long, place->fpfn + (size >> PAGE_SHIFT), lpfn); } - remaining_size = size; - do { - /* - * Limit maximum size to 2GiB due to SG table limitations. - * FIXME: Should maybe be handled as part of sg construction. - */ - u64 alloc_size = min_t(u64, remaining_size, SZ_2G); - - err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, - (u64)lpfn << PAGE_SHIFT, - alloc_size, - min_page_size, - &vres->blocks, - vres->flags); - if (err) - goto error_free_blocks; - - remaining_size -= alloc_size; - } while (remaining_size); + err = drm_buddy_alloc_blocks(mm, (u64)place->fpfn << PAGE_SHIFT, + (u64)lpfn << PAGE_SHIFT, size, + min_page_size, &vres->blocks, vres->flags); + if (err) + goto error_unlock; if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) @@ -194,9 +168,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, *res = &vres->base; return 0; - -error_free_blocks: - drm_buddy_free_list(mm, &vres->blocks, 0); +error_unlock: mutex_unlock(&mgr->lock); error_fini: ttm_resource_fini(man, &vres->base); @@ -393,7 +365,8 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, xe_res_first(res, offset, length, &cursor); while (cursor.remaining) { num_entries++; - xe_res_next(&cursor, cursor.size); + /* Limit maximum size to 2GiB due to SG table limitations. */ + xe_res_next(&cursor, min_t(u64, cursor.size, SZ_2G)); } r = sg_alloc_table(*sgt, num_entries, GFP_KERNEL); @@ -413,7 +386,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, xe_res_first(res, offset, length, &cursor); for_each_sgtable_sg((*sgt), sg, i) { phys_addr_t phys = cursor.start + tile->mem.vram.io_start; - size_t size = cursor.size; + size_t size = min_t(u64, cursor.size, SZ_2G); dma_addr_t addr; addr = dma_map_resource(dev, phys, size, dir, @@ -426,7 +399,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe, sg_dma_address(sg) = addr; sg_dma_len(sg) = size; - xe_res_next(&cursor, cursor.size); + xe_res_next(&cursor, size); } return 0; diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index c99380271de6..b4a44e1ea167 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -10,7 +10,6 @@ #include <drm/drm_exec.h> #include <drm/drm_print.h> -#include <drm/ttm/ttm_execbuf_util.h> #include <drm/ttm/ttm_tt.h> #include <uapi/drm/xe_drm.h> #include <linux/ascii85.h> @@ -733,13 +732,14 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds) vops->pt_update_ops[i].ops = kmalloc_array(vops->pt_update_ops[i].num_ops, sizeof(*vops->pt_update_ops[i].ops), - GFP_KERNEL); + GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!vops->pt_update_ops[i].ops) return array_of_binds ? -ENOBUFS : -ENOMEM; } return 0; } +ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO); static void xe_vma_ops_fini(struct xe_vma_ops *vops) { @@ -1352,6 +1352,7 @@ static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile, return 0; } +ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO); static void xe_vm_free_scratch(struct xe_vm *vm) { @@ -1978,6 +1979,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo, return ops; } +ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO); static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op, u16 pat_index, unsigned int flags) @@ -2357,13 +2359,15 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma, bool validate) { struct xe_bo *bo = xe_vma_bo(vma); + struct xe_vm *vm = xe_vma_vm(vma); int err = 0; if (bo) { if (!bo->vm) err = drm_exec_lock_obj(exec, &bo->ttm.base); if (!err && validate) - err = xe_bo_validate(bo, xe_vma_vm(vma), true); + err = xe_bo_validate(bo, vm, + !xe_vm_in_preempt_fence_mode(vm)); } return err; @@ -2697,6 +2701,7 @@ unlock: drm_exec_fini(&exec); return err; } +ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO); #define SUPPORTED_FLAGS_STUB \ (DRM_XE_VM_BIND_FLAG_READONLY | \ @@ -2733,7 +2738,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, *bind_ops = kvmalloc_array(args->num_binds, sizeof(struct drm_xe_vm_bind_op), - GFP_KERNEL | __GFP_ACCOUNT); + GFP_KERNEL | __GFP_ACCOUNT | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!*bind_ops) return args->num_binds > 1 ? -ENOBUFS : -ENOMEM; @@ -2973,14 +2979,16 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (args->num_binds) { bos = kvcalloc(args->num_binds, sizeof(*bos), - GFP_KERNEL | __GFP_ACCOUNT); + GFP_KERNEL | __GFP_ACCOUNT | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!bos) { err = -ENOMEM; goto release_vm_lock; } ops = kvcalloc(args->num_binds, sizeof(*ops), - GFP_KERNEL | __GFP_ACCOUNT); + GFP_KERNEL | __GFP_ACCOUNT | + __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (!ops) { err = -ENOMEM; goto release_vm_lock; @@ -3303,7 +3311,6 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) for (int i = 0; i < snap->num_snaps; i++) { struct xe_bo *bo = snap->snap[i].bo; - struct iosys_map src; int err; if (IS_ERR(snap->snap[i].data)) @@ -3316,16 +3323,8 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap) } if (bo) { - xe_bo_lock(bo, false); - err = ttm_bo_vmap(&bo->ttm, &src); - if (!err) { - xe_map_memcpy_from(xe_bo_device(bo), - snap->snap[i].data, - &src, snap->snap[i].bo_ofs, - snap->snap[i].len); - ttm_bo_vunmap(&bo->ttm, &src); - } - xe_bo_unlock(bo); + err = xe_bo_read(bo, snap->snap[i].bo_ofs, + snap->snap[i].data, snap->snap[i].len); } else { void __user *userptr = (void __user *)(size_t)snap->snap[i].bo_ofs; diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h index c864dba35e1d..23adb7442881 100644 --- a/drivers/gpu/drm/xe/xe_vm.h +++ b/drivers/gpu/drm/xe/xe_vm.h @@ -17,7 +17,6 @@ struct drm_printer; struct drm_file; struct ttm_buffer_object; -struct ttm_validate_buffer; struct xe_exec_queue; struct xe_file; diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h index 4d33f310b653..078786958403 100644 --- a/drivers/gpu/drm/xe/xe_vm_doc.h +++ b/drivers/gpu/drm/xe/xe_vm_doc.h @@ -64,8 +64,8 @@ * update page level 2 PDE[1] to page level 3b phys address (GPU) * * bind BO2 0x1ff000-0x201000 - * update page level 3a PTE[511] to BO2 phys addres (GPU) - * update page level 3b PTE[0] to BO2 phys addres + 0x1000 (GPU) + * update page level 3a PTE[511] to BO2 phys address (GPU) + * update page level 3b PTE[0] to BO2 phys address + 0x1000 (GPU) * * GPU bypass * ~~~~~~~~~~ @@ -192,7 +192,7 @@ * * If a VM is in fault mode (TODO: link to fault mode), new bind operations that * create mappings are by default deferred to the page fault handler (first - * use). This behavior can be overriden by setting the flag + * use). This behavior can be overridden by setting the flag * DRM_XE_VM_BIND_FLAG_IMMEDIATE which indicates to creating the mapping * immediately. * @@ -209,7 +209,7 @@ * * Since this a core kernel managed memory the kernel can move this memory * whenever it wants. We register an invalidation MMU notifier to alert XE when - * a user poiter is about to move. The invalidation notifier needs to block + * a user pointer is about to move. The invalidation notifier needs to block * until all pending users (jobs or compute mode engines) of the userptr are * idle to ensure no faults. This done by waiting on all of VM's dma-resv slots. * @@ -252,7 +252,7 @@ * Rebind worker * ------------- * - * The rebind worker is very similar to an exec. It is resposible for rebinding + * The rebind worker is very similar to an exec. It is responsible for rebinding * evicted BOs or userptrs, waiting on those operations, installing new preempt * fences, and finally resuming executing of engines in the VM. * @@ -317,11 +317,11 @@ * are not allowed, only long running workloads and ULLS are enabled on a faulting * VM. * - * Defered VM binds + * Deferred VM binds * ---------------- * * By default, on a faulting VM binds just allocate the VMA and the actual - * updating of the page tables is defered to the page fault handler. This + * updating of the page tables is deferred to the page fault handler. This * behavior can be overridden by setting the flag DRM_XE_VM_BIND_FLAG_IMMEDIATE in * the VM bind which will then do the bind immediately. * @@ -500,18 +500,18 @@ * Slot waiting * ------------ * - * 1. The exection of all jobs from kernel ops shall wait on all slots + * 1. The execution of all jobs from kernel ops shall wait on all slots * (DMA_RESV_USAGE_PREEMPT_FENCE) of either an external BO or VM (depends on if * kernel op is operating on external or private BO) * - * 2. In non-compute mode, the exection of all jobs from rebinds in execs shall + * 2. In non-compute mode, the execution of all jobs from rebinds in execs shall * wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO or VM * (depends on if the rebind is operatiing on an external or private BO) * - * 3. In non-compute mode, the exection of all jobs from execs shall wait on the + * 3. In non-compute mode, the execution of all jobs from execs shall wait on the * last rebind job * - * 4. In compute mode, the exection of all jobs from rebinds in the rebind + * 4. In compute mode, the execution of all jobs from rebinds in the rebind * worker shall wait on the DMA_RESV_USAGE_KERNEL slot of either an external BO * or VM (depends on if rebind is operating on external or private BO) * diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c new file mode 100644 index 000000000000..b378848d3b7b --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vsec.c @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright © 2024 Intel Corporation */ +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/cleanup.h> +#include <linux/errno.h> +#include <linux/intel_vsec.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/types.h> + +#include "xe_device.h" +#include "xe_device_types.h" +#include "xe_drv.h" +#include "xe_mmio.h" +#include "xe_platform_types.h" +#include "xe_pm.h" +#include "xe_vsec.h" + +#include "regs/xe_pmt.h" + +/* PMT GUID value for BMG devices. NOTE: this is NOT a PCI id */ +#define BMG_DEVICE_ID 0xE2F8 + +static struct intel_vsec_header bmg_telemetry = { + .length = 0x10, + .id = VSEC_ID_TELEMETRY, + .num_entries = 2, + .entry_size = 4, + .tbir = 0, + .offset = BMG_DISCOVERY_OFFSET, +}; + +static struct intel_vsec_header bmg_punit_crashlog = { + .length = 0x10, + .id = VSEC_ID_CRASHLOG, + .num_entries = 1, + .entry_size = 4, + .tbir = 0, + .offset = BMG_DISCOVERY_OFFSET + 0x60, +}; + +static struct intel_vsec_header bmg_oobmsm_crashlog = { + .length = 0x10, + .id = VSEC_ID_CRASHLOG, + .num_entries = 1, + .entry_size = 4, + .tbir = 0, + .offset = BMG_DISCOVERY_OFFSET + 0x78, +}; + +static struct intel_vsec_header *bmg_capabilities[] = { + &bmg_telemetry, + &bmg_punit_crashlog, + &bmg_oobmsm_crashlog, + NULL +}; + +enum xe_vsec { + XE_VSEC_UNKNOWN = 0, + XE_VSEC_BMG, +}; + +static struct intel_vsec_platform_info xe_vsec_info[] = { + [XE_VSEC_BMG] = { + .caps = VSEC_CAP_TELEMETRY | VSEC_CAP_CRASHLOG, + .headers = bmg_capabilities, + }, + { } +}; + +/* + * The GUID will have the following bits to decode: + * [0:3] - {Telemetry space iteration number (0,1,..)} + * [4:7] - Segment (SEGMENT_INDEPENDENT-0, Client-1, Server-2) + * [8:11] - SOC_SKU + * [12:27] – Device ID – changes for each down bin SKU’s + * [28:29] - Capability Type (Crashlog-0, Telemetry Aggregator-1, Watcher-2) + * [30:31] - Record-ID (0-PUNIT, 1-OOBMSM_0, 2-OOBMSM_1) + */ +#define GUID_TELEM_ITERATION GENMASK(3, 0) +#define GUID_SEGMENT GENMASK(7, 4) +#define GUID_SOC_SKU GENMASK(11, 8) +#define GUID_DEVICE_ID GENMASK(27, 12) +#define GUID_CAP_TYPE GENMASK(29, 28) +#define GUID_RECORD_ID GENMASK(31, 30) + +#define PUNIT_TELEMETRY_OFFSET 0x0200 +#define PUNIT_WATCHER_OFFSET 0x14A0 +#define OOBMSM_0_WATCHER_OFFSET 0x18D8 +#define OOBMSM_1_TELEMETRY_OFFSET 0x1000 + +enum record_id { + PUNIT, + OOBMSM_0, + OOBMSM_1, +}; + +enum capability { + CRASHLOG, + TELEMETRY, + WATCHER, +}; + +static int xe_guid_decode(u32 guid, int *index, u32 *offset) +{ + u32 record_id = FIELD_GET(GUID_RECORD_ID, guid); + u32 cap_type = FIELD_GET(GUID_CAP_TYPE, guid); + u32 device_id = FIELD_GET(GUID_DEVICE_ID, guid); + + if (device_id != BMG_DEVICE_ID) + return -ENODEV; + + if (cap_type > WATCHER) + return -EINVAL; + + *offset = 0; + + if (cap_type == CRASHLOG) { + *index = record_id == PUNIT ? 2 : 4; + return 0; + } + + switch (record_id) { + case PUNIT: + *index = 0; + if (cap_type == TELEMETRY) + *offset = PUNIT_TELEMETRY_OFFSET; + else + *offset = PUNIT_WATCHER_OFFSET; + break; + + case OOBMSM_0: + *index = 1; + if (cap_type == WATCHER) + *offset = OOBMSM_0_WATCHER_OFFSET; + break; + + case OOBMSM_1: + *index = 1; + if (cap_type == TELEMETRY) + *offset = OOBMSM_1_TELEMETRY_OFFSET; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, + u32 count) +{ + struct xe_device *xe = pdev_to_xe_device(pdev); + void __iomem *telem_addr = xe->mmio.regs + BMG_TELEMETRY_OFFSET; + u32 mem_region; + u32 offset; + int ret; + + ret = xe_guid_decode(guid, &mem_region, &offset); + if (ret) + return ret; + + telem_addr += offset + user_offset; + + guard(mutex)(&xe->pmt.lock); + + /* indicate that we are not at an appropriate power level */ + if (!xe_pm_runtime_get_if_active(xe)) + return -ENODATA; + + /* set SoC re-mapper index register based on GUID memory region */ + xe_mmio_rmw32(xe_root_tile_mmio(xe), SG_REMAP_INDEX1, SG_REMAP_BITS, + REG_FIELD_PREP(SG_REMAP_BITS, mem_region)); + + memcpy_fromio(data, telem_addr, count); + xe_pm_runtime_put(xe); + + return count; +} + +static struct pmt_callbacks xe_pmt_cb = { + .read_telem = xe_pmt_telem_read, +}; + +static const int vsec_platforms[] = { + [XE_BATTLEMAGE] = XE_VSEC_BMG, +}; + +static enum xe_vsec get_platform_info(struct xe_device *xe) +{ + if (xe->info.platform > XE_BATTLEMAGE) + return XE_VSEC_UNKNOWN; + + return vsec_platforms[xe->info.platform]; +} + +/** + * xe_vsec_init - Initialize resources and add intel_vsec auxiliary + * interface + * @xe: valid xe instance + */ +void xe_vsec_init(struct xe_device *xe) +{ + struct intel_vsec_platform_info *info; + struct device *dev = xe->drm.dev; + struct pci_dev *pdev = to_pci_dev(dev); + enum xe_vsec platform; + + platform = get_platform_info(xe); + if (platform == XE_VSEC_UNKNOWN) + return; + + info = &xe_vsec_info[platform]; + if (!info->headers) + return; + + switch (platform) { + case XE_VSEC_BMG: + info->priv_data = &xe_pmt_cb; + break; + default: + break; + } + + /* + * Register a VSEC. Cleanup is handled using device managed + * resources. + */ + intel_vsec_register(pdev, info); +} +MODULE_IMPORT_NS("INTEL_VSEC"); diff --git a/drivers/gpu/drm/xe/xe_vsec.h b/drivers/gpu/drm/xe/xe_vsec.h new file mode 100644 index 000000000000..5777c53faec2 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_vsec.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright © 2024 Intel Corporation */ + +#ifndef _XE_VSEC_H_ +#define _XE_VSEC_H_ + +struct xe_device; + +void xe_vsec_init(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index 02cf647f86d8..570fe0376402 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -607,6 +607,12 @@ static const struct xe_rtp_entry_sr engine_was[] = { FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(ROW_CHICKEN4, DISABLE_TDL_PUSH)) }, + { XE_RTP_NAME("16024792527"), + XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0), + FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(FIELD_SET(SAMPLER_MODE, SMP_WAIT_FETCH_MERGING_COUNTER, + SMP_FORCE_128B_OVERFETCH)) + }, {} }; diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index bcd04464b85e..3ed12a85cc60 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -1,3 +1,4 @@ +1607983814 GRAPHICS_VERSION_RANGE(1200, 1210) 22012773006 GRAPHICS_VERSION_RANGE(1200, 1250) 14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0) PLATFORM(DG2) diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c index aab79c5e34c2..1bda7ef606cc 100644 --- a/drivers/gpu/drm/xen/xen_drm_front.c +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -478,7 +478,6 @@ static const struct drm_driver xen_drm_driver = { .fops = &xen_drm_dev_fops, .name = "xendrm-du", .desc = "Xen PV DRM Display Unit", - .date = "20180221", .major = 1, .minor = 0, @@ -525,11 +524,6 @@ static int xen_drm_drv_init(struct xen_drm_front_info *front_info) if (ret) goto fail_register; - DRM_INFO("Initialized %s %d.%d.%d %s on minor %d\n", - xen_drm_driver.name, xen_drm_driver.major, - xen_drm_driver.minor, xen_drm_driver.patchlevel, - xen_drm_driver.date, drm_dev->primary->index); - return 0; fail_register: diff --git a/drivers/gpu/drm/xlnx/Kconfig b/drivers/gpu/drm/xlnx/Kconfig index 4197f44e202f..dbecca9bdd54 100644 --- a/drivers/gpu/drm/xlnx/Kconfig +++ b/drivers/gpu/drm/xlnx/Kconfig @@ -17,3 +17,12 @@ config DRM_ZYNQMP_DPSUB This is a DRM/KMS driver for ZynqMP DisplayPort controller. Choose this option if you have a Xilinx ZynqMP SoC with DisplayPort subsystem. + +config DRM_ZYNQMP_DPSUB_AUDIO + bool "ZynqMP DisplayPort Audio Support" + depends on DRM_ZYNQMP_DPSUB + depends on SND && SND_SOC + select SND_SOC_GENERIC_DMAENGINE_PCM + help + Choose this option to enable DisplayPort audio support in the ZynqMP + DisplayPort driver. diff --git a/drivers/gpu/drm/xlnx/Makefile b/drivers/gpu/drm/xlnx/Makefile index ea1422a39502..ab6e2ffd7e8d 100644 --- a/drivers/gpu/drm/xlnx/Makefile +++ b/drivers/gpu/drm/xlnx/Makefile @@ -1,2 +1,3 @@ zynqmp-dpsub-y := zynqmp_disp.o zynqmp_dpsub.o zynqmp_dp.o zynqmp_kms.o +zynqmp-dpsub-$(CONFIG_DRM_ZYNQMP_DPSUB_AUDIO) += zynqmp_dp_audio.o obj-$(CONFIG_DRM_ZYNQMP_DPSUB) += zynqmp-dpsub.o diff --git a/drivers/gpu/drm/xlnx/zynqmp_disp.c b/drivers/gpu/drm/xlnx/zynqmp_disp.c index e4e0e299e8a7..80d1e499a18d 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_disp.c +++ b/drivers/gpu/drm/xlnx/zynqmp_disp.c @@ -143,7 +143,6 @@ struct zynqmp_disp_layer { * @dpsub: Display subsystem * @blend: Register I/O base address for the blender * @avbuf: Register I/O base address for the audio/video buffer manager - * @audio: Registers I/O base address for the audio mixer * @layers: Layers (planes) */ struct zynqmp_disp { @@ -152,7 +151,6 @@ struct zynqmp_disp { void __iomem *blend; void __iomem *avbuf; - void __iomem *audio; struct zynqmp_disp_layer layers[ZYNQMP_DPSUB_NUM_LAYERS]; }; @@ -866,42 +864,6 @@ static void zynqmp_disp_blend_layer_disable(struct zynqmp_disp *disp, } /* ----------------------------------------------------------------------------- - * Audio Mixer - */ - -static void zynqmp_disp_audio_write(struct zynqmp_disp *disp, int reg, u32 val) -{ - writel(val, disp->audio + reg); -} - -/** - * zynqmp_disp_audio_enable - Enable the audio mixer - * @disp: Display controller - * - * Enable the audio mixer by de-asserting the soft reset. The audio state is set to - * default values by the reset, set the default mixer volume explicitly. - */ -static void zynqmp_disp_audio_enable(struct zynqmp_disp *disp) -{ - /* Clear the audio soft reset register as it's an non-reset flop. */ - zynqmp_disp_audio_write(disp, ZYNQMP_DISP_AUD_SOFT_RESET, 0); - zynqmp_disp_audio_write(disp, ZYNQMP_DISP_AUD_MIXER_VOLUME, - ZYNQMP_DISP_AUD_MIXER_VOLUME_NO_SCALE); -} - -/** - * zynqmp_disp_audio_disable - Disable the audio mixer - * @disp: Display controller - * - * Disable the audio mixer by asserting its soft reset. - */ -static void zynqmp_disp_audio_disable(struct zynqmp_disp *disp) -{ - zynqmp_disp_audio_write(disp, ZYNQMP_DISP_AUD_SOFT_RESET, - ZYNQMP_DISP_AUD_SOFT_RESET_AUD_SRST); -} - -/* ----------------------------------------------------------------------------- * ZynqMP Display Layer & DRM Plane */ @@ -1341,8 +1303,6 @@ void zynqmp_disp_enable(struct zynqmp_disp *disp) disp->dpsub->vid_clk_from_ps); zynqmp_disp_avbuf_enable_channels(disp); zynqmp_disp_avbuf_enable_audio(disp); - - zynqmp_disp_audio_enable(disp); } /** @@ -1351,8 +1311,6 @@ void zynqmp_disp_enable(struct zynqmp_disp *disp) */ void zynqmp_disp_disable(struct zynqmp_disp *disp) { - zynqmp_disp_audio_disable(disp); - zynqmp_disp_avbuf_disable_audio(disp); zynqmp_disp_avbuf_disable_channels(disp); zynqmp_disp_avbuf_disable(disp); @@ -1421,12 +1379,6 @@ int zynqmp_disp_probe(struct zynqmp_dpsub *dpsub) goto error; } - disp->audio = devm_platform_ioremap_resource_byname(pdev, "aud"); - if (IS_ERR(disp->audio)) { - ret = PTR_ERR(disp->audio); - goto error; - } - ret = zynqmp_disp_create_layers(disp); if (ret) goto error; diff --git a/drivers/gpu/drm/xlnx/zynqmp_disp_regs.h b/drivers/gpu/drm/xlnx/zynqmp_disp_regs.h index fa3935384834..9a4ff094e276 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_disp_regs.h +++ b/drivers/gpu/drm/xlnx/zynqmp_disp_regs.h @@ -177,12 +177,7 @@ #define ZYNQMP_DISP_AUD_MIXER_VOLUME 0x0 #define ZYNQMP_DISP_AUD_MIXER_VOLUME_NO_SCALE 0x20002000 #define ZYNQMP_DISP_AUD_MIXER_META_DATA 0x4 -#define ZYNQMP_DISP_AUD_CH_STATUS0 0x8 -#define ZYNQMP_DISP_AUD_CH_STATUS1 0xc -#define ZYNQMP_DISP_AUD_CH_STATUS2 0x10 -#define ZYNQMP_DISP_AUD_CH_STATUS3 0x14 -#define ZYNQMP_DISP_AUD_CH_STATUS4 0x18 -#define ZYNQMP_DISP_AUD_CH_STATUS5 0x1c +#define ZYNQMP_DISP_AUD_CH_STATUS(x) (0x8 + ((x) * 4)) #define ZYNQMP_DISP_AUD_CH_A_DATA0 0x20 #define ZYNQMP_DISP_AUD_CH_A_DATA1 0x24 #define ZYNQMP_DISP_AUD_CH_A_DATA2 0x28 diff --git a/drivers/gpu/drm/xlnx/zynqmp_dp.c b/drivers/gpu/drm/xlnx/zynqmp_dp.c index 25c5dc61ee88..0b63fd48ea92 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dp.c +++ b/drivers/gpu/drm/xlnx/zynqmp_dp.c @@ -1342,7 +1342,6 @@ static void zynqmp_dp_encoder_mode_set_stream(struct zynqmp_dp *dp, { u8 lane_cnt = dp->mode.lane_cnt; u32 reg, wpl; - unsigned int rate; zynqmp_dp_write(dp, ZYNQMP_DP_MAIN_STREAM_HTOTAL, mode->htotal); zynqmp_dp_write(dp, ZYNQMP_DP_MAIN_STREAM_VTOTAL, mode->vtotal); @@ -1367,18 +1366,8 @@ static void zynqmp_dp_encoder_mode_set_stream(struct zynqmp_dp *dp, reg = drm_dp_bw_code_to_link_rate(dp->mode.bw_code); zynqmp_dp_write(dp, ZYNQMP_DP_MAIN_STREAM_N_VID, reg); zynqmp_dp_write(dp, ZYNQMP_DP_MAIN_STREAM_M_VID, mode->clock); - rate = zynqmp_dpsub_get_audio_clk_rate(dp->dpsub); - if (rate) { - dev_dbg(dp->dev, "Audio rate: %d\n", rate / 512); - zynqmp_dp_write(dp, ZYNQMP_DP_TX_N_AUD, reg); - zynqmp_dp_write(dp, ZYNQMP_DP_TX_M_AUD, rate / 1000); - } } - /* Only 2 channel audio is supported now */ - if (zynqmp_dpsub_audio_enabled(dp->dpsub)) - zynqmp_dp_write(dp, ZYNQMP_DP_TX_AUDIO_CHANNELS, 1); - zynqmp_dp_write(dp, ZYNQMP_DP_USER_PIX_WIDTH, 1); /* Translate to the native 16 bit datapath based on IP core spec */ @@ -1388,6 +1377,44 @@ static void zynqmp_dp_encoder_mode_set_stream(struct zynqmp_dp *dp, } /* ----------------------------------------------------------------------------- + * Audio + */ + +void zynqmp_dp_audio_set_channels(struct zynqmp_dp *dp, + unsigned int num_channels) +{ + zynqmp_dp_write(dp, ZYNQMP_DP_TX_AUDIO_CHANNELS, num_channels - 1); +} + +void zynqmp_dp_audio_enable(struct zynqmp_dp *dp) +{ + zynqmp_dp_write(dp, ZYNQMP_DP_TX_AUDIO_CONTROL, 1); +} + +void zynqmp_dp_audio_disable(struct zynqmp_dp *dp) +{ + zynqmp_dp_write(dp, ZYNQMP_DP_TX_AUDIO_CONTROL, 0); +} + +void zynqmp_dp_audio_write_n_m(struct zynqmp_dp *dp) +{ + unsigned int rate; + u32 link_rate; + + if (!(dp->config.misc0 & ZYNQMP_DP_MAIN_STREAM_MISC0_SYNC_LOCK)) + return; + + link_rate = drm_dp_bw_code_to_link_rate(dp->mode.bw_code); + + rate = clk_get_rate(dp->dpsub->aud_clk); + + dev_dbg(dp->dev, "Audio rate: %d\n", rate / 512); + + zynqmp_dp_write(dp, ZYNQMP_DP_TX_N_AUD, link_rate); + zynqmp_dp_write(dp, ZYNQMP_DP_TX_M_AUD, rate / 1000); +} + +/* ----------------------------------------------------------------------------- * DISP Configuration */ @@ -1577,8 +1604,7 @@ static void zynqmp_dp_bridge_atomic_enable(struct drm_bridge *bridge, /* Enable the encoder */ dp->enabled = true; zynqmp_dp_update_misc(dp); - if (zynqmp_dpsub_audio_enabled(dp->dpsub)) - zynqmp_dp_write(dp, ZYNQMP_DP_TX_AUDIO_CONTROL, 1); + zynqmp_dp_write(dp, ZYNQMP_DP_TX_PHY_POWER_DOWN, 0); if (dp->status == connector_status_connected) { for (i = 0; i < 3; i++) { @@ -1613,8 +1639,6 @@ static void zynqmp_dp_bridge_atomic_disable(struct drm_bridge *bridge, drm_dp_dpcd_writeb(&dp->aux, DP_SET_POWER, DP_SET_POWER_D3); zynqmp_dp_write(dp, ZYNQMP_DP_TX_PHY_POWER_DOWN, ZYNQMP_DP_TX_PHY_POWER_DOWN_ALL); - if (zynqmp_dpsub_audio_enabled(dp->dpsub)) - zynqmp_dp_write(dp, ZYNQMP_DP_TX_AUDIO_CONTROL, 0); zynqmp_dp_disp_disable(dp, old_bridge_state); mutex_unlock(&dp->lock); @@ -2190,7 +2214,7 @@ static int zynqmp_dp_rate_get(void *data, u64 *val) struct zynqmp_dp *dp = data; mutex_lock(&dp->lock); - *val = drm_dp_bw_code_to_link_rate(dp->test.bw_code) * 10000; + *val = drm_dp_bw_code_to_link_rate(dp->test.bw_code) * 10000ULL; mutex_unlock(&dp->lock); return 0; } diff --git a/drivers/gpu/drm/xlnx/zynqmp_dp.h b/drivers/gpu/drm/xlnx/zynqmp_dp.h index f077d7fbd0ad..a3257793e23a 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dp.h +++ b/drivers/gpu/drm/xlnx/zynqmp_dp.h @@ -22,4 +22,11 @@ void zynqmp_dp_disable_vblank(struct zynqmp_dp *dp); int zynqmp_dp_probe(struct zynqmp_dpsub *dpsub); void zynqmp_dp_remove(struct zynqmp_dpsub *dpsub); +void zynqmp_dp_audio_set_channels(struct zynqmp_dp *dp, + unsigned int num_channels); +void zynqmp_dp_audio_enable(struct zynqmp_dp *dp); +void zynqmp_dp_audio_disable(struct zynqmp_dp *dp); + +void zynqmp_dp_audio_write_n_m(struct zynqmp_dp *dp); + #endif /* _ZYNQMP_DP_H_ */ diff --git a/drivers/gpu/drm/xlnx/zynqmp_dp_audio.c b/drivers/gpu/drm/xlnx/zynqmp_dp_audio.c new file mode 100644 index 000000000000..fa5f0ace6084 --- /dev/null +++ b/drivers/gpu/drm/xlnx/zynqmp_dp_audio.c @@ -0,0 +1,447 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ZynqMP DisplayPort Subsystem Driver - Audio support + * + * Copyright (C) 2015 - 2024 Xilinx, Inc. + * + * Authors: + * - Hyun Woo Kwon <hyun.kwon@xilinx.com> + * - Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> + */ + +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/mutex.h> +#include <linux/pm_runtime.h> + +#include <sound/asoundef.h> +#include <sound/core.h> +#include <sound/dmaengine_pcm.h> +#include <sound/initval.h> +#include <sound/pcm.h> +#include <sound/soc.h> +#include <sound/tlv.h> + +#include "zynqmp_disp_regs.h" +#include "zynqmp_dp.h" +#include "zynqmp_dpsub.h" + +#define ZYNQMP_DISP_AUD_SMPL_RATE_TO_CLK 512 +#define ZYNQMP_NUM_PCMS 2 + +struct zynqmp_dpsub_audio { + void __iomem *base; + + struct snd_soc_card card; + + const char *dai_name; + const char *link_names[ZYNQMP_NUM_PCMS]; + const char *pcm_names[ZYNQMP_NUM_PCMS]; + + struct snd_soc_dai_driver dai_driver; + struct snd_dmaengine_pcm_config pcm_configs[2]; + + struct snd_soc_dai_link links[ZYNQMP_NUM_PCMS]; + + struct { + struct snd_soc_dai_link_component cpu; + struct snd_soc_dai_link_component codec; + struct snd_soc_dai_link_component platform; + } components[ZYNQMP_NUM_PCMS]; + + /* + * Protects: + * - enabled_streams + * - volumes + * - current_rate + */ + struct mutex enable_lock; + + u32 enabled_streams; + u32 current_rate; + + u16 volumes[2]; +}; + +static const struct snd_pcm_hardware zynqmp_dp_pcm_hw = { + .info = SNDRV_PCM_INFO_MMAP | + SNDRV_PCM_INFO_MMAP_VALID | + SNDRV_PCM_INFO_INTERLEAVED | + SNDRV_PCM_INFO_PAUSE | + SNDRV_PCM_INFO_RESUME | + SNDRV_PCM_INFO_NO_PERIOD_WAKEUP, + + .buffer_bytes_max = 128 * 1024, + .period_bytes_min = 256, + .period_bytes_max = 1024 * 1024, + .periods_min = 2, + .periods_max = 256, +}; + +static int zynqmp_dp_startup(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + + snd_pcm_hw_constraint_step(runtime, 0, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, + 256); + + return 0; +} + +static const struct snd_soc_ops zynqmp_dp_ops = { + .startup = zynqmp_dp_startup, +}; + +static void zynqmp_dp_audio_write(struct zynqmp_dpsub_audio *audio, int reg, + u32 val) +{ + writel(val, audio->base + reg); +} + +static int dp_dai_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params, + struct snd_soc_dai *socdai) +{ + struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); + struct zynqmp_dpsub *dpsub = + snd_soc_dai_get_drvdata(snd_soc_rtd_to_cpu(rtd, 0)); + struct zynqmp_dpsub_audio *audio = dpsub->audio; + int ret; + u32 sample_rate; + struct snd_aes_iec958 iec = { 0 }; + unsigned long rate; + + sample_rate = params_rate(params); + + if (sample_rate != 48000 && sample_rate != 44100) + return -EINVAL; + + guard(mutex)(&audio->enable_lock); + + if (audio->enabled_streams && audio->current_rate != sample_rate) { + dev_err(dpsub->dev, + "Can't change rate while playback enabled\n"); + return -EINVAL; + } + + if (audio->enabled_streams > 0) { + /* Nothing to do */ + audio->enabled_streams++; + return 0; + } + + audio->current_rate = sample_rate; + + /* Note: clock rate can only be changed if the clock is disabled */ + ret = clk_set_rate(dpsub->aud_clk, + sample_rate * ZYNQMP_DISP_AUD_SMPL_RATE_TO_CLK); + if (ret) { + dev_err(dpsub->dev, "can't set aud_clk to %u err:%d\n", + sample_rate * ZYNQMP_DISP_AUD_SMPL_RATE_TO_CLK, ret); + return ret; + } + + clk_prepare_enable(dpsub->aud_clk); + + rate = clk_get_rate(dpsub->aud_clk); + + /* Ignore some offset +- 10 */ + if (abs(sample_rate * ZYNQMP_DISP_AUD_SMPL_RATE_TO_CLK - rate) > 10) { + dev_err(dpsub->dev, "aud_clk offset is higher: %ld\n", + sample_rate * ZYNQMP_DISP_AUD_SMPL_RATE_TO_CLK - rate); + clk_disable_unprepare(dpsub->aud_clk); + return -EINVAL; + } + + pm_runtime_get_sync(dpsub->dev); + + zynqmp_dp_audio_write(audio, ZYNQMP_DISP_AUD_MIXER_VOLUME, + audio->volumes[0] | (audio->volumes[1] << 16)); + + /* Clear the audio soft reset register as it's an non-reset flop. */ + zynqmp_dp_audio_write(audio, ZYNQMP_DISP_AUD_SOFT_RESET, 0); + + /* Only 2 channel audio is supported now */ + zynqmp_dp_audio_set_channels(dpsub->dp, 2); + + zynqmp_dp_audio_write_n_m(dpsub->dp); + + /* Channel status */ + + if (sample_rate == 48000) + iec.status[3] = IEC958_AES3_CON_FS_48000; + else + iec.status[3] = IEC958_AES3_CON_FS_44100; + + for (unsigned int i = 0; i < AES_IEC958_STATUS_SIZE / 4; ++i) { + u32 v; + + v = (iec.status[(i * 4) + 0] << 0) | + (iec.status[(i * 4) + 1] << 8) | + (iec.status[(i * 4) + 2] << 16) | + (iec.status[(i * 4) + 3] << 24); + + zynqmp_dp_audio_write(audio, ZYNQMP_DISP_AUD_CH_STATUS(i), v); + } + + zynqmp_dp_audio_enable(dpsub->dp); + + audio->enabled_streams++; + + return 0; +} + +static int dp_dai_hw_free(struct snd_pcm_substream *substream, + struct snd_soc_dai *socdai) +{ + struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); + struct zynqmp_dpsub *dpsub = + snd_soc_dai_get_drvdata(snd_soc_rtd_to_cpu(rtd, 0)); + struct zynqmp_dpsub_audio *audio = dpsub->audio; + + guard(mutex)(&audio->enable_lock); + + /* Nothing to do */ + if (audio->enabled_streams > 1) { + audio->enabled_streams--; + return 0; + } + + pm_runtime_put(dpsub->dev); + + zynqmp_dp_audio_disable(dpsub->dp); + + /* + * Reset doesn't work. If we assert reset between audio stop and start, + * the audio won't start anymore. Probably we are missing writing + * some audio related registers. A/B buf? + */ + /* + zynqmp_disp_audio_write(audio, ZYNQMP_DISP_AUD_SOFT_RESET, + ZYNQMP_DISP_AUD_SOFT_RESET_AUD_SRST); + */ + + clk_disable_unprepare(dpsub->aud_clk); + + audio->current_rate = 0; + audio->enabled_streams--; + + return 0; +} + +static const struct snd_soc_dai_ops zynqmp_dp_dai_ops = { + .hw_params = dp_dai_hw_params, + .hw_free = dp_dai_hw_free, +}; + +/* + * Min = 10 * log10(0x1 / 0x2000) = -39.13 + * Max = 10 * log10(0xffffff / 0x2000) = 9.03 + */ +static const DECLARE_TLV_DB_RANGE(zynqmp_dp_tlv, + 0x0, 0x0, TLV_DB_SCALE_ITEM(TLV_DB_GAIN_MUTE, -3913, 1), + 0x1, 0x2000, TLV_DB_LINEAR_ITEM(-3913, 0), + 0x2000, 0xffff, TLV_DB_LINEAR_ITEM(0, 903), +); + +static const struct snd_kcontrol_new zynqmp_dp_snd_controls[] = { + SOC_SINGLE_TLV("Input0 Playback Volume", 0, + 0, 0xffff, 0, zynqmp_dp_tlv), + SOC_SINGLE_TLV("Input1 Playback Volume", 1, + 0, 0xffff, 0, zynqmp_dp_tlv), +}; + +/* + * Note: these read & write functions only support two "registers", 0 and 1, + * for volume 0 and 1. In other words, these are not real register read/write + * functions. + * + * This is done to support caching the volume value for the case where the + * hardware is not enabled, and also to support locking as volumes 0 and 1 + * are in the same register. + */ +static unsigned int zynqmp_dp_dai_read(struct snd_soc_component *component, + unsigned int reg) +{ + struct zynqmp_dpsub *dpsub = dev_get_drvdata(component->dev); + struct zynqmp_dpsub_audio *audio = dpsub->audio; + + return audio->volumes[reg]; +} + +static int zynqmp_dp_dai_write(struct snd_soc_component *component, + unsigned int reg, unsigned int val) +{ + struct zynqmp_dpsub *dpsub = dev_get_drvdata(component->dev); + struct zynqmp_dpsub_audio *audio = dpsub->audio; + + guard(mutex)(&audio->enable_lock); + + audio->volumes[reg] = val; + + if (audio->enabled_streams) + zynqmp_dp_audio_write(audio, ZYNQMP_DISP_AUD_MIXER_VOLUME, + audio->volumes[0] | + (audio->volumes[1] << 16)); + + return 0; +} + +static const struct snd_soc_component_driver zynqmp_dp_component_driver = { + .idle_bias_on = 1, + .use_pmdown_time = 1, + .endianness = 1, + .controls = zynqmp_dp_snd_controls, + .num_controls = ARRAY_SIZE(zynqmp_dp_snd_controls), + .read = zynqmp_dp_dai_read, + .write = zynqmp_dp_dai_write, +}; + +int zynqmp_audio_init(struct zynqmp_dpsub *dpsub) +{ + struct platform_device *pdev = to_platform_device(dpsub->dev); + struct device *dev = dpsub->dev; + struct zynqmp_dpsub_audio *audio; + struct snd_soc_card *card; + void *dev_data; + int ret; + + if (!dpsub->aud_clk) + return 0; + + audio = devm_kzalloc(dev, sizeof(*audio), GFP_KERNEL); + if (!audio) + return -ENOMEM; + + dpsub->audio = audio; + + mutex_init(&audio->enable_lock); + + /* 0x2000 is the zero level, no change */ + audio->volumes[0] = 0x2000; + audio->volumes[1] = 0x2000; + + audio->dai_name = devm_kasprintf(dev, GFP_KERNEL, + "%s-dai", dev_name(dev)); + + for (unsigned int i = 0; i < ZYNQMP_NUM_PCMS; ++i) { + audio->link_names[i] = devm_kasprintf(dev, GFP_KERNEL, + "%s-dp-%u", dev_name(dev), i); + audio->pcm_names[i] = devm_kasprintf(dev, GFP_KERNEL, + "%s-pcm-%u", dev_name(dev), i); + } + + audio->base = devm_platform_ioremap_resource_byname(pdev, "aud"); + if (IS_ERR(audio->base)) + return PTR_ERR(audio->base); + + /* Create CPU DAI */ + + audio->dai_driver = (struct snd_soc_dai_driver) { + .name = audio->dai_name, + .ops = &zynqmp_dp_dai_ops, + .playback = { + .channels_min = 2, + .channels_max = 2, + .rates = SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000, + .formats = SNDRV_PCM_FMTBIT_S16_LE, + }, + }; + + ret = devm_snd_soc_register_component(dev, &zynqmp_dp_component_driver, + &audio->dai_driver, 1); + if (ret) { + dev_err(dev, "Failed to register CPU DAI\n"); + return ret; + } + + /* Create PCMs */ + + for (unsigned int i = 0; i < ZYNQMP_NUM_PCMS; ++i) { + struct snd_dmaengine_pcm_config *pcm_config = + &audio->pcm_configs[i]; + + *pcm_config = (struct snd_dmaengine_pcm_config){ + .name = audio->pcm_names[i], + .pcm_hardware = &zynqmp_dp_pcm_hw, + .prealloc_buffer_size = 64 * 1024, + .chan_names[SNDRV_PCM_STREAM_PLAYBACK] = + i == 0 ? "aud0" : "aud1", + }; + + ret = devm_snd_dmaengine_pcm_register(dev, pcm_config, 0); + if (ret) { + dev_err(dev, "Failed to register PCM %u\n", i); + return ret; + } + } + + /* Create card */ + + card = &audio->card; + card->name = "DisplayPort"; + card->long_name = "DisplayPort Monitor"; + card->driver_name = "zynqmp_dpsub"; + card->dev = dev; + card->owner = THIS_MODULE; + card->num_links = ZYNQMP_NUM_PCMS; + card->dai_link = audio->links; + + for (unsigned int i = 0; i < ZYNQMP_NUM_PCMS; ++i) { + struct snd_soc_dai_link *link = &card->dai_link[i]; + + link->ops = &zynqmp_dp_ops; + + link->name = audio->link_names[i]; + link->stream_name = audio->link_names[i]; + + link->cpus = &audio->components[i].cpu; + link->num_cpus = 1; + link->cpus[0].dai_name = audio->dai_name; + + link->codecs = &audio->components[i].codec; + link->num_codecs = 1; + link->codecs[0].name = "snd-soc-dummy"; + link->codecs[0].dai_name = "snd-soc-dummy-dai"; + + link->platforms = &audio->components[i].platform; + link->num_platforms = 1; + link->platforms[0].name = audio->pcm_names[i]; + } + + /* + * HACK: devm_snd_soc_register_card() overwrites current drvdata + * so we need to hack it back. + */ + dev_data = dev_get_drvdata(dev); + ret = devm_snd_soc_register_card(dev, card); + dev_set_drvdata(dev, dev_data); + if (ret) { + /* + * As older dtbs may not have the audio channel dmas defined, + * instead of returning an error here we'll continue and just + * mark the audio as disabled. + */ + dev_err(dev, "Failed to register sound card, disabling audio support\n"); + + devm_kfree(dev, audio); + dpsub->audio = NULL; + + return 0; + } + + return 0; +} + +void zynqmp_audio_uninit(struct zynqmp_dpsub *dpsub) +{ + struct zynqmp_dpsub_audio *audio = dpsub->audio; + + if (!audio) + return; + + if (!dpsub->aud_clk) + return; + + mutex_destroy(&audio->enable_lock); +} diff --git a/drivers/gpu/drm/xlnx/zynqmp_dpsub.c b/drivers/gpu/drm/xlnx/zynqmp_dpsub.c index 07c4d184e7a1..f953ca48a930 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dpsub.c +++ b/drivers/gpu/drm/xlnx/zynqmp_dpsub.c @@ -57,36 +57,6 @@ static const struct dev_pm_ops zynqmp_dpsub_pm_ops = { }; /* ----------------------------------------------------------------------------- - * DPSUB Configuration - */ - -/** - * zynqmp_dpsub_audio_enabled - If the audio is enabled - * @dpsub: DisplayPort subsystem - * - * Return if the audio is enabled depending on the audio clock. - * - * Return: true if audio is enabled, or false. - */ -bool zynqmp_dpsub_audio_enabled(struct zynqmp_dpsub *dpsub) -{ - return !!dpsub->aud_clk; -} - -/** - * zynqmp_dpsub_get_audio_clk_rate - Get the current audio clock rate - * @dpsub: DisplayPort subsystem - * - * Return: the current audio clock rate. - */ -unsigned int zynqmp_dpsub_get_audio_clk_rate(struct zynqmp_dpsub *dpsub) -{ - if (zynqmp_dpsub_audio_enabled(dpsub)) - return 0; - return clk_get_rate(dpsub->aud_clk); -} - -/* ----------------------------------------------------------------------------- * Probe & Remove */ @@ -264,10 +234,17 @@ static int zynqmp_dpsub_probe(struct platform_device *pdev) goto err_disp; } + ret = zynqmp_audio_init(dpsub); + if (ret) + goto err_drm_cleanup; + dev_info(&pdev->dev, "ZynqMP DisplayPort Subsystem driver probed"); return 0; +err_drm_cleanup: + if (dpsub->drm) + zynqmp_dpsub_drm_cleanup(dpsub); err_disp: drm_bridge_remove(dpsub->bridge); zynqmp_disp_remove(dpsub); @@ -287,6 +264,8 @@ static void zynqmp_dpsub_remove(struct platform_device *pdev) { struct zynqmp_dpsub *dpsub = platform_get_drvdata(pdev); + zynqmp_audio_uninit(dpsub); + if (dpsub->drm) zynqmp_dpsub_drm_cleanup(dpsub); diff --git a/drivers/gpu/drm/xlnx/zynqmp_dpsub.h b/drivers/gpu/drm/xlnx/zynqmp_dpsub.h index b18554467e9c..49875529c2a4 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_dpsub.h +++ b/drivers/gpu/drm/xlnx/zynqmp_dpsub.h @@ -12,6 +12,8 @@ #ifndef _ZYNQMP_DPSUB_H_ #define _ZYNQMP_DPSUB_H_ +#include <linux/types.h> + struct clk; struct device; struct drm_bridge; @@ -39,6 +41,8 @@ enum zynqmp_dpsub_format { ZYNQMP_DPSUB_FORMAT_YONLY, }; +struct zynqmp_dpsub_audio; + /** * struct zynqmp_dpsub - ZynqMP DisplayPort Subsystem * @dev: The physical device @@ -77,10 +81,17 @@ struct zynqmp_dpsub { struct zynqmp_dp *dp; unsigned int dma_align; + + struct zynqmp_dpsub_audio *audio; }; -bool zynqmp_dpsub_audio_enabled(struct zynqmp_dpsub *dpsub); -unsigned int zynqmp_dpsub_get_audio_clk_rate(struct zynqmp_dpsub *dpsub); +#ifdef CONFIG_DRM_ZYNQMP_DPSUB_AUDIO +int zynqmp_audio_init(struct zynqmp_dpsub *dpsub); +void zynqmp_audio_uninit(struct zynqmp_dpsub *dpsub); +#else +static inline int zynqmp_audio_init(struct zynqmp_dpsub *dpsub) { return 0; } +static inline void zynqmp_audio_uninit(struct zynqmp_dpsub *dpsub) { } +#endif void zynqmp_dpsub_release(struct zynqmp_dpsub *dpsub); diff --git a/drivers/gpu/drm/xlnx/zynqmp_kms.c b/drivers/gpu/drm/xlnx/zynqmp_kms.c index fc81983d9e5e..b47463473472 100644 --- a/drivers/gpu/drm/xlnx/zynqmp_kms.c +++ b/drivers/gpu/drm/xlnx/zynqmp_kms.c @@ -9,12 +9,12 @@ * - Laurent Pinchart <laurent.pinchart@ideasonboard.com> */ +#include <drm/clients/drm_client_setup.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_blend.h> #include <drm/drm_bridge.h> #include <drm/drm_bridge_connector.h> -#include <drm/drm_client_setup.h> #include <drm/drm_connector.h> #include <drm/drm_crtc.h> #include <drm/drm_device.h> @@ -409,7 +409,6 @@ static const struct drm_driver zynqmp_dpsub_drm_driver = { .name = "zynqmp-dpsub", .desc = "Xilinx DisplayPort Subsystem Driver", - .date = "20130509", .major = 1, .minor = 0, }; |
