aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/pm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/pm')
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_dpm.c79
-rw-r--r--drivers/gpu/drm/amd/pm/amdgpu_pm.c153
-rw-r--r--drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h5
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c58
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h108
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h74
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h6
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c305
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c151
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h6
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c30
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c14
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h26
17 files changed, 949 insertions, 78 deletions
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 71d986dd7a6e..518d07afc7df 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -764,10 +764,6 @@ int amdgpu_dpm_send_rma_reason(struct amdgpu_device *adev)
ret = smu_send_rma_reason(smu);
mutex_unlock(&adev->pm.mutex);
- if (adev->cper.enabled)
- if (amdgpu_cper_generate_bp_threshold_record(adev))
- dev_warn(adev->dev, "fail to generate bad page threshold cper records\n");
-
return ret;
}
@@ -824,6 +820,21 @@ int amdgpu_dpm_reset_vcn(struct amdgpu_device *adev, uint32_t inst_mask)
return ret;
}
+bool amdgpu_dpm_reset_vcn_is_supported(struct amdgpu_device *adev)
+{
+ struct smu_context *smu = adev->powerplay.pp_handle;
+ bool ret;
+
+ if (!is_support_sw_smu(adev))
+ return false;
+
+ mutex_lock(&adev->pm.mutex);
+ ret = smu_reset_vcn_is_supported(smu);
+ mutex_unlock(&adev->pm.mutex);
+
+ return ret;
+}
+
int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
enum pp_clock_type type,
uint32_t *min,
@@ -2038,6 +2049,66 @@ int amdgpu_dpm_get_dpm_clock_table(struct amdgpu_device *adev,
}
/**
+ * amdgpu_dpm_get_temp_metrics - Retrieve metrics for a specific compute
+ * partition
+ * @adev: Pointer to the device.
+ * @type: Identifier for the temperature type metrics to be fetched.
+ * @table: Pointer to a buffer where the metrics will be stored. If NULL, the
+ * function returns the size of the metrics structure.
+ *
+ * This function retrieves metrics for a specific temperature type, If the
+ * table parameter is NULL, the function returns the size of the metrics
+ * structure without populating it.
+ *
+ * Return: Size of the metrics structure on success, or a negative error code on failure.
+ */
+ssize_t amdgpu_dpm_get_temp_metrics(struct amdgpu_device *adev,
+ enum smu_temp_metric_type type, void *table)
+{
+ const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ int ret;
+
+ if (!pp_funcs->get_temp_metrics ||
+ !amdgpu_dpm_is_temp_metrics_supported(adev, type))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&adev->pm.mutex);
+ ret = pp_funcs->get_temp_metrics(adev->powerplay.pp_handle, type, table);
+ mutex_unlock(&adev->pm.mutex);
+
+ return ret;
+}
+
+/**
+ * amdgpu_dpm_is_temp_metrics_supported - Return if specific temperature metrics support
+ * is available
+ * @adev: Pointer to the device.
+ * @type: Identifier for the temperature type metrics to be fetched.
+ *
+ * This function returns metrics if specific temperature metrics type is supported or not.
+ *
+ * Return: True in case of metrics type supported else false.
+ */
+bool amdgpu_dpm_is_temp_metrics_supported(struct amdgpu_device *adev,
+ enum smu_temp_metric_type type)
+{
+ const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
+ bool support_temp_metrics = false;
+
+ if (!pp_funcs->temp_metrics_is_supported)
+ return support_temp_metrics;
+
+ if (is_support_sw_smu(adev)) {
+ mutex_lock(&adev->pm.mutex);
+ support_temp_metrics =
+ pp_funcs->temp_metrics_is_supported(adev->powerplay.pp_handle, type);
+ mutex_unlock(&adev->pm.mutex);
+ }
+
+ return support_temp_metrics;
+}
+
+/**
* amdgpu_dpm_get_xcp_metrics - Retrieve metrics for a specific compute
* partition
* @adev: Pointer to the device.
diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
index 4b64851fdb42..5230276628a3 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
@@ -2073,6 +2073,134 @@ static int pp_dpm_clk_default_attr_update(struct amdgpu_device *adev, struct amd
return 0;
}
+/**
+ * DOC: board
+ *
+ * Certain SOCs can support various board attributes reporting. This is useful
+ * for user application to monitor various board reated attributes.
+ *
+ * The amdgpu driver provides a sysfs API for reporting board attributes. Presently,
+ * only two types of attributes are reported, baseboard temperature and
+ * gpu board temperature. Both of them are reported as binary files.
+ *
+ * * .. code-block:: console
+ *
+ * hexdump /sys/bus/pci/devices/.../board/baseboard_temp
+ *
+ * hexdump /sys/bus/pci/devices/.../board/gpuboard_temp
+ *
+ */
+
+/**
+ * DOC: baseboard_temp
+ *
+ * The amdgpu driver provides a sysfs API for retrieving current baseboard
+ * temperature metrics data. The file baseboard_temp is used for this.
+ * Reading the file will dump all the current baseboard temperature metrics data.
+ */
+static ssize_t amdgpu_get_baseboard_temp_metrics(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ ssize_t size;
+ int ret;
+
+ ret = amdgpu_pm_get_access_if_active(adev);
+ if (ret)
+ return ret;
+
+ size = amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_BASEBOARD, NULL);
+ if (size <= 0)
+ goto out;
+ if (size >= PAGE_SIZE) {
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_BASEBOARD, buf);
+
+out:
+ amdgpu_pm_put_access(adev);
+
+ if (ret)
+ return ret;
+
+ return size;
+}
+
+/**
+ * DOC: gpuboard_temp
+ *
+ * The amdgpu driver provides a sysfs API for retrieving current gpuboard
+ * temperature metrics data. The file gpuboard_temp is used for this.
+ * Reading the file will dump all the current gpuboard temperature metrics data.
+ */
+static ssize_t amdgpu_get_gpuboard_temp_metrics(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+ ssize_t size;
+ int ret;
+
+ ret = amdgpu_pm_get_access_if_active(adev);
+ if (ret)
+ return ret;
+
+ size = amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_GPUBOARD, NULL);
+ if (size <= 0)
+ goto out;
+ if (size >= PAGE_SIZE) {
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ amdgpu_dpm_get_temp_metrics(adev, SMU_TEMP_METRIC_GPUBOARD, buf);
+
+out:
+ amdgpu_pm_put_access(adev);
+
+ if (ret)
+ return ret;
+
+ return size;
+}
+
+static DEVICE_ATTR(baseboard_temp, 0444, amdgpu_get_baseboard_temp_metrics, NULL);
+static DEVICE_ATTR(gpuboard_temp, 0444, amdgpu_get_gpuboard_temp_metrics, NULL);
+
+static struct attribute *board_attrs[] = {
+ &dev_attr_baseboard_temp.attr,
+ &dev_attr_gpuboard_temp.attr,
+ NULL
+};
+
+static umode_t amdgpu_board_attr_visible(struct kobject *kobj, struct attribute *attr, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = drm_to_adev(ddev);
+
+ if (attr == &dev_attr_baseboard_temp.attr) {
+ if (!amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_BASEBOARD))
+ return 0;
+ }
+
+ if (attr == &dev_attr_gpuboard_temp.attr) {
+ if (!amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_GPUBOARD))
+ return 0;
+ }
+
+ return attr->mode;
+}
+
+const struct attribute_group amdgpu_board_attr_group = {
+ .name = "board",
+ .attrs = board_attrs,
+ .is_visible = amdgpu_board_attr_visible,
+};
+
/* pm policy attributes */
struct amdgpu_pm_policy_attr {
struct device_attribute dev_attr;
@@ -3458,14 +3586,16 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
effective_mode &= ~S_IWUSR;
/* not implemented yet for APUs other than GC 10.3.1 (vangogh) and 9.4.3 */
- if (((adev->family == AMDGPU_FAMILY_SI) ||
- ((adev->flags & AMD_IS_APU) && (gc_ver != IP_VERSION(10, 3, 1)) &&
- (gc_ver != IP_VERSION(9, 4, 3) && gc_ver != IP_VERSION(9, 4, 4)))) &&
- (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr ||
- attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr ||
- attr == &sensor_dev_attr_power1_cap.dev_attr.attr ||
- attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr))
- return 0;
+ if (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr ||
+ attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr ||
+ attr == &sensor_dev_attr_power1_cap.dev_attr.attr ||
+ attr == &sensor_dev_attr_power1_cap_default.dev_attr.attr) {
+ if (adev->family == AMDGPU_FAMILY_SI ||
+ ((adev->flags & AMD_IS_APU) && gc_ver != IP_VERSION(10, 3, 1) &&
+ (gc_ver != IP_VERSION(9, 4, 3) && gc_ver != IP_VERSION(9, 4, 4))) ||
+ (amdgpu_sriov_vf(adev) && gc_ver == IP_VERSION(11, 0, 3)))
+ return 0;
+ }
/* not implemented yet for APUs having < GC 9.3.0 (Renoir) */
if (((adev->family == AMDGPU_FAMILY_SI) ||
@@ -4461,6 +4591,13 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
goto err_out0;
}
+ if (amdgpu_dpm_is_temp_metrics_supported(adev, SMU_TEMP_METRIC_GPUBOARD)) {
+ ret = devm_device_add_group(adev->dev,
+ &amdgpu_board_attr_group);
+ if (ret)
+ goto err_out0;
+ }
+
adev->pm.sysfs_initialized = true;
return 0;
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
index 768317ee1486..9748744133d9 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
@@ -526,6 +526,8 @@ int amdgpu_dpm_set_power_profile_mode(struct amdgpu_device *adev,
int amdgpu_dpm_get_gpu_metrics(struct amdgpu_device *adev, void **table);
ssize_t amdgpu_dpm_get_xcp_metrics(struct amdgpu_device *adev, int xcp_id,
void *table);
+ssize_t amdgpu_dpm_get_temp_metrics(struct amdgpu_device *adev,
+ enum smu_temp_metric_type type, void *table);
/**
* @get_pm_metrics: Get one snapshot of power management metrics from PMFW. The
@@ -613,5 +615,8 @@ ssize_t amdgpu_dpm_get_pm_policy_info(struct amdgpu_device *adev,
int amdgpu_dpm_reset_sdma(struct amdgpu_device *adev, uint32_t inst_mask);
bool amdgpu_dpm_reset_sdma_is_supported(struct amdgpu_device *adev);
int amdgpu_dpm_reset_vcn(struct amdgpu_device *adev, uint32_t inst_mask);
+bool amdgpu_dpm_reset_vcn_is_supported(struct amdgpu_device *adev);
+bool amdgpu_dpm_is_temp_metrics_supported(struct amdgpu_device *adev,
+ enum smu_temp_metric_type type);
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index b47cb4a5f488..c5965924e7c6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -766,6 +766,7 @@ static int smu_set_funcs(struct amdgpu_device *adev)
case IP_VERSION(13, 0, 14):
case IP_VERSION(13, 0, 12):
smu_v13_0_6_set_ppt_funcs(smu);
+ smu_v13_0_6_set_temp_funcs(smu);
/* Enable pp_od_clk_voltage node */
smu->od_enabled = true;
break;
@@ -3831,6 +3832,51 @@ int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type,
return ret;
}
+static ssize_t smu_sys_get_temp_metrics(void *handle, enum smu_temp_metric_type type, void *table)
+{
+ struct smu_context *smu = handle;
+ struct smu_table_context *smu_table = &smu->smu_table;
+ struct smu_table *tables = smu_table->tables;
+ enum smu_table_id table_id;
+
+ if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+ return -EOPNOTSUPP;
+
+ if (!smu->smu_temp.temp_funcs || !smu->smu_temp.temp_funcs->get_temp_metrics)
+ return -EOPNOTSUPP;
+
+ table_id = smu_metrics_get_temp_table_id(type);
+
+ if (table_id == SMU_TABLE_COUNT)
+ return -EINVAL;
+
+ /* If the request is to get size alone, return the cached table size */
+ if (!table && tables[table_id].cache.size)
+ return tables[table_id].cache.size;
+
+ if (smu_table_cache_is_valid(&tables[table_id])) {
+ memcpy(table, tables[table_id].cache.buffer,
+ tables[table_id].cache.size);
+ return tables[table_id].cache.size;
+ }
+
+ return smu->smu_temp.temp_funcs->get_temp_metrics(smu, type, table);
+}
+
+static bool smu_temp_metrics_is_supported(void *handle, enum smu_temp_metric_type type)
+{
+ struct smu_context *smu = handle;
+ bool ret = false;
+
+ if (!smu->pm_enabled)
+ return false;
+
+ if (smu->smu_temp.temp_funcs && smu->smu_temp.temp_funcs->temp_metrics_is_supported)
+ ret = smu->smu_temp.temp_funcs->temp_metrics_is_supported(smu, type);
+
+ return ret;
+}
+
static ssize_t smu_sys_get_xcp_metrics(void *handle, int xcp_id, void *table)
{
struct smu_context *smu = handle;
@@ -3903,6 +3949,8 @@ static const struct amd_pm_funcs swsmu_pm_funcs = {
.get_dpm_clock_table = smu_get_dpm_clock_table,
.get_smu_prv_buf_details = smu_get_prv_buffer_details,
.get_xcp_metrics = smu_sys_get_xcp_metrics,
+ .get_temp_metrics = smu_sys_get_temp_metrics,
+ .temp_metrics_is_supported = smu_temp_metrics_is_supported,
};
int smu_wait_for_event(struct smu_context *smu, enum smu_event_type event,
@@ -4076,6 +4124,16 @@ int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask)
return ret;
}
+bool smu_reset_vcn_is_supported(struct smu_context *smu)
+{
+ bool ret = false;
+
+ if (smu->ppt_funcs && smu->ppt_funcs->reset_vcn_is_supported)
+ ret = smu->ppt_funcs->reset_vcn_is_supported(smu);
+
+ return ret;
+}
+
int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask)
{
if (smu->ppt_funcs && smu->ppt_funcs->dpm_reset_vcn)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index b52e194397e2..5dd49eca598d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -249,6 +249,14 @@ struct smu_user_dpm_profile {
tables[table_id].domain = d; \
} while (0)
+struct smu_table_cache {
+ void *buffer;
+ size_t size;
+ /* interval in ms*/
+ uint32_t interval;
+ unsigned long last_cache_time;
+};
+
struct smu_table {
uint64_t size;
uint32_t align;
@@ -257,6 +265,7 @@ struct smu_table {
void *cpu_addr;
struct amdgpu_bo *bo;
uint32_t version;
+ struct smu_table_cache cache;
};
enum smu_perf_level_designation {
@@ -322,6 +331,8 @@ enum smu_table_id {
SMU_TABLE_ECCINFO,
SMU_TABLE_COMBO_PPTABLE,
SMU_TABLE_WIFIBAND,
+ SMU_TABLE_GPUBOARD_TEMP_METRICS,
+ SMU_TABLE_BASEBOARD_TEMP_METRICS,
SMU_TABLE_COUNT,
};
@@ -396,6 +407,10 @@ struct smu_dpm_context {
struct smu_dpm_policy_ctxt *dpm_policies;
};
+struct smu_temp_context {
+ const struct smu_temp_funcs *temp_funcs;
+};
+
struct smu_power_gate {
bool uvd_gated;
bool vce_gated;
@@ -529,6 +544,7 @@ struct smu_context {
struct smu_table_context smu_table;
struct smu_dpm_context smu_dpm;
struct smu_power_context smu_power;
+ struct smu_temp_context smu_temp;
struct smu_feature smu_feature;
struct amd_pp_display_configuration *display_config;
struct smu_baco_context smu_baco;
@@ -624,6 +640,28 @@ struct smu_context {
struct i2c_adapter;
/**
+ * struct smu_temp_funcs - Callbacks used to get temperature data.
+ */
+struct smu_temp_funcs {
+ /**
+ * @get_temp_metrics: Calibrate voltage/frequency curve to fit the system's
+ * power delivery and voltage margins. Required for adaptive
+ * @type Temperature metrics type(baseboard/gpuboard)
+ * Return: Size of &table
+ */
+ ssize_t (*get_temp_metrics)(struct smu_context *smu,
+ enum smu_temp_metric_type type, void *table);
+
+ /**
+ * @temp_metrics_is_support: Get if specific temperature metrics is supported
+ * @type Temperature metrics type(baseboard/gpuboard)
+ * Return: true if supported else false
+ */
+ bool (*temp_metrics_is_supported)(struct smu_context *smu, enum smu_temp_metric_type type);
+
+};
+
+/**
* struct pptable_funcs - Callbacks used to interact with the SMU.
*/
struct pptable_funcs {
@@ -1397,6 +1435,10 @@ struct pptable_funcs {
* @reset_vcn: message SMU to soft reset vcn instance.
*/
int (*dpm_reset_vcn)(struct smu_context *smu, uint32_t inst_mask);
+ /**
+ * @reset_vcn_is_supported: Check if support resets vcn.
+ */
+ bool (*reset_vcn_is_supported)(struct smu_context *smu);
/**
* @get_ecc_table: message SMU to get ECC INFO table.
@@ -1622,6 +1664,71 @@ typedef struct {
struct smu_dpm_policy *smu_get_pm_policy(struct smu_context *smu,
enum pp_pm_policy p_type);
+static inline enum smu_table_id
+smu_metrics_get_temp_table_id(enum smu_temp_metric_type type)
+{
+ switch (type) {
+ case SMU_TEMP_METRIC_BASEBOARD:
+ return SMU_TABLE_BASEBOARD_TEMP_METRICS;
+ case SMU_TEMP_METRIC_GPUBOARD:
+ return SMU_TABLE_GPUBOARD_TEMP_METRICS;
+ default:
+ return SMU_TABLE_COUNT;
+ }
+
+ return SMU_TABLE_COUNT;
+}
+
+static inline void smu_table_cache_update_time(struct smu_table *table,
+ unsigned long time)
+{
+ table->cache.last_cache_time = time;
+}
+
+static inline bool smu_table_cache_is_valid(struct smu_table *table)
+{
+ if (!table->cache.buffer || !table->cache.last_cache_time ||
+ !table->cache.interval || !table->cache.size ||
+ time_after(jiffies,
+ table->cache.last_cache_time +
+ msecs_to_jiffies(table->cache.interval)))
+ return false;
+
+ return true;
+}
+
+static inline int smu_table_cache_init(struct smu_context *smu,
+ enum smu_table_id table_id, size_t size,
+ uint32_t cache_interval)
+{
+ struct smu_table_context *smu_table = &smu->smu_table;
+ struct smu_table *tables = smu_table->tables;
+
+ tables[table_id].cache.buffer = kzalloc(size, GFP_KERNEL);
+ if (!tables[table_id].cache.buffer)
+ return -ENOMEM;
+
+ tables[table_id].cache.last_cache_time = 0;
+ tables[table_id].cache.interval = cache_interval;
+ tables[table_id].cache.size = size;
+
+ return 0;
+}
+
+static inline void smu_table_cache_fini(struct smu_context *smu,
+ enum smu_table_id table_id)
+{
+ struct smu_table_context *smu_table = &smu->smu_table;
+ struct smu_table *tables = smu_table->tables;
+
+ if (tables[table_id].cache.buffer) {
+ kfree(tables[table_id].cache.buffer);
+ tables[table_id].cache.buffer = NULL;
+ tables[table_id].cache.last_cache_time = 0;
+ tables[table_id].cache.interval = 0;
+ }
+}
+
#if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && !defined(SWSMU_CODE_LAYER_L4)
int smu_get_power_limit(void *handle,
uint32_t *limit,
@@ -1673,6 +1780,7 @@ int smu_send_rma_reason(struct smu_context *smu);
int smu_reset_sdma(struct smu_context *smu, uint32_t inst_mask);
bool smu_reset_sdma_is_supported(struct smu_context *smu);
int smu_reset_vcn(struct smu_context *smu, uint32_t inst_mask);
+bool smu_reset_vcn_is_supported(struct smu_context *smu);
int smu_set_pm_policy(struct smu_context *smu, enum pp_pm_policy p_type,
int level);
ssize_t smu_get_pm_policy_info(struct smu_context *smu,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h
index 0a2ca544f4e3..1c407a8e96ee 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_pmfw.h
@@ -135,7 +135,63 @@ typedef enum {
GFX_DVM_MARGIN_COUNT
} GFX_DVM_MARGIN_e;
-#define SMU_METRICS_TABLE_VERSION 0x13
+typedef enum{
+ SYSTEM_TEMP_UBB_FPGA,
+ SYSTEM_TEMP_UBB_FRONT,
+ SYSTEM_TEMP_UBB_BACK,
+ SYSTEM_TEMP_UBB_OAM7,
+ SYSTEM_TEMP_UBB_IBC,
+ SYSTEM_TEMP_UBB_UFPGA,
+ SYSTEM_TEMP_UBB_OAM1,
+ SYSTEM_TEMP_OAM_0_1_HSC,
+ SYSTEM_TEMP_OAM_2_3_HSC,
+ SYSTEM_TEMP_OAM_4_5_HSC,
+ SYSTEM_TEMP_OAM_6_7_HSC,
+ SYSTEM_TEMP_UBB_FPGA_0V72_VR,
+ SYSTEM_TEMP_UBB_FPGA_3V3_VR,
+ SYSTEM_TEMP_RETIMER_0_1_2_3_1V2_VR,
+ SYSTEM_TEMP_RETIMER_4_5_6_7_1V2_VR,
+ SYSTEM_TEMP_RETIMER_0_1_0V9_VR,
+ SYSTEM_TEMP_RETIMER_4_5_0V9_VR,
+ SYSTEM_TEMP_RETIMER_2_3_0V9_VR,
+ SYSTEM_TEMP_RETIMER_6_7_0V9_VR,
+ SYSTEM_TEMP_OAM_0_1_2_3_3V3_VR,
+ SYSTEM_TEMP_OAM_4_5_6_7_3V3_VR,
+ SYSTEM_TEMP_IBC_HSC,
+ SYSTEM_TEMP_IBC,
+ SYSTEM_TEMP_MAX_ENTRIES = 32
+} SYSTEM_TEMP_e;
+
+typedef enum{
+ NODE_TEMP_RETIMER,
+ NODE_TEMP_IBC_TEMP,
+ NODE_TEMP_IBC_2_TEMP,
+ NODE_TEMP_VDD18_VR_TEMP,
+ NODE_TEMP_04_HBM_B_VR_TEMP,
+ NODE_TEMP_04_HBM_D_VR_TEMP,
+ NODE_TEMP_MAX_TEMP_ENTRIES = 12
+} NODE_TEMP_e;
+
+typedef enum {
+ SVI_VDDCR_VDD0_TEMP,
+ SVI_VDDCR_VDD1_TEMP,
+ SVI_VDDCR_VDD2_TEMP,
+ SVI_VDDCR_VDD3_TEMP,
+ SVI_VDDCR_SOC_A_TEMP,
+ SVI_VDDCR_SOC_C_TEMP,
+ SVI_VDDCR_SOCIO_A_TEMP,
+ SVI_VDDCR_SOCIO_C_TEMP,
+ SVI_VDD_085_HBM_TEMP,
+ SVI_VDDCR_11_HBM_B_TEMP,
+ SVI_VDDCR_11_HBM_D_TEMP,
+ SVI_VDD_USR_TEMP,
+ SVI_VDDIO_11_E32_TEMP,
+ SVI_MAX_TEMP_ENTRIES, // 13
+} SVI_TEMP_e;
+
+#define SMU_METRICS_TABLE_VERSION 0x14
+
+#define SMU_SYSTEM_METRICS_TABLE_VERSION 0x0
typedef struct __attribute__((packed, aligned(4))) {
uint64_t AccumulationCounter;
@@ -231,11 +287,27 @@ typedef struct __attribute__((packed, aligned(4))) {
uint64_t GfxclkBelowHostLimitThmAcc[8];
uint64_t GfxclkBelowHostLimitTotalAcc[8];
uint64_t GfxclkLowUtilizationAcc[8];
+
+ uint32_t AidTemperature[4];
+ uint32_t XcdTemperature[8];
+ uint32_t HbmTemperature[8];
} MetricsTable_t;
#define SMU_VF_METRICS_TABLE_MASK (1 << 31)
#define SMU_VF_METRICS_TABLE_VERSION (0x6 | SMU_VF_METRICS_TABLE_MASK)
+#pragma pack(push, 4)
+typedef struct {
+ uint64_t AccumulationCounter; // Last update timestamp
+ uint16_t LabelVersion; // Defaults to 0.
+ uint16_t NodeIdentifier; // Unique identifier to each node on system.
+ int16_t SystemTemperatures[SYSTEM_TEMP_MAX_ENTRIES]; // Signed integer temperature value in Celsius, unused fields are set to 0xFFFF
+ int16_t NodeTemperatures[NODE_TEMP_MAX_TEMP_ENTRIES]; // Signed integer temperature value in Celsius, unused fields are set to 0xFFFF
+ int16_t VrTemperatures[SVI_MAX_TEMP_ENTRIES]; // Signed integer temperature value in Celsius
+ int16_t spare[3];
+} SystemMetricsTable_t;
+#pragma pack(pop)
+
typedef struct __attribute__((packed, aligned(4))) {
uint32_t AccumulationCounter;
uint32_t InstGfxclk_TargFreq;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h
index e1f490b6ce64..aff2776a8b6f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_12_ppsmc.h
@@ -116,7 +116,11 @@
#define PPSMC_MSG_DumpErrorRecord 0x57
#define PPSMC_MSG_EraseRasTable 0x58
#define PPSMC_MSG_GetStaticMetricsTable 0x59
-#define PPSMC_Message_Count 0x5A
+#define PPSMC_MSG_ResetVfArbitersByIndex 0x5A
+#define PPSMC_MSG_GetBadPageSeverity 0x5B
+#define PPSMC_MSG_GetSystemMetricsTable 0x5C
+#define PPSMC_MSG_GetSystemMetricsVersion 0x5D
+#define PPSMC_Message_Count 0x5E
//PPSMC Reset Types for driver msg argument
#define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
index 41f268313613..63a088ef7169 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_6_ppsmc.h
@@ -94,9 +94,9 @@
#define PPSMC_MSG_RmaDueToBadPageThreshold 0x43
#define PPSMC_MSG_SetThrottlingPolicy 0x44
#define PPSMC_MSG_ResetSDMA 0x4D
-#define PPSMC_MSG_ResetVCN 0x4E
#define PPSMC_MSG_GetStaticMetricsTable 0x59
-#define PPSMC_Message_Count 0x5A
+#define PPSMC_MSG_ResetVCN 0x5B
+#define PPSMC_Message_Count 0x5C
//PPSMC Reset Types for driver msg argument
#define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index d7a9e41820fa..2256c77da636 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -278,7 +278,8 @@
__SMU_DUMMY_MAP(MALLPowerState), \
__SMU_DUMMY_MAP(ResetSDMA), \
__SMU_DUMMY_MAP(ResetVCN), \
- __SMU_DUMMY_MAP(GetStaticMetricsTable),
+ __SMU_DUMMY_MAP(GetStaticMetricsTable), \
+ __SMU_DUMMY_MAP(GetSystemMetricsTable),
#undef __SMU_DUMMY_MAP
#define __SMU_DUMMY_MAP(type) SMU_MSG_##type
@@ -469,6 +470,7 @@ enum smu_feature_mask {
/* Message category flags */
#define SMU_MSG_VF_FLAG (1U << 0)
#define SMU_MSG_RAS_PRI (1U << 1)
+#define SMU_MSG_NO_PRECHECK (1U << 2)
/* Firmware capability flags */
#define SMU_FW_CAP_RAS_PRI (1U << 0)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 9ad46f545d15..599eddb5a67d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -1897,7 +1897,7 @@ static ssize_t arcturus_get_gpu_metrics(struct smu_context *smu,
ret = smu_cmn_get_metrics_table(smu,
&metrics,
- true);
+ false);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
index c63d2e28954d..b067147b7c41 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
@@ -1781,7 +1781,7 @@ static ssize_t aldebaran_get_gpu_metrics(struct smu_context *smu,
ret = smu_cmn_get_metrics_table(smu,
&metrics,
- true);
+ false);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
index 02a455a31c25..32fd0be05cff 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_12_ppt.c
@@ -83,7 +83,6 @@ const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[SMU_FEATURE_COUNT] =
SMU_13_0_12_FEA_MAP(SMU_FEATURE_PIT_BIT, FEATURE_PIT),
};
-// clang-format off
const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] = {
MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0),
MSG_MAP(GetSmuVersion, PPSMC_MSG_GetSmuVersion, 1),
@@ -106,7 +105,7 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1),
MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 0),
MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1),
- MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI),
+ MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI | SMU_MSG_NO_PRECHECK),
MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0),
MSG_MAP(DramLogSetDramAddrLow, PPSMC_MSG_DramLogSetDramAddrLow, 0),
MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0),
@@ -138,8 +137,47 @@ const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[SMU_MSG_MAX_COUNT] =
MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0),
MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0),
MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1),
+ MSG_MAP(GetSystemMetricsTable, PPSMC_MSG_GetSystemMetricsTable, 0),
};
+int smu_v13_0_12_tables_init(struct smu_context *smu)
+{
+ struct amdgpu_baseboard_temp_metrics_v1_0 *baseboard_temp_metrics;
+ struct amdgpu_gpuboard_temp_metrics_v1_0 *gpuboard_temp_metrics;
+ struct smu_table_context *smu_table = &smu->smu_table;
+ struct smu_table *tables = smu_table->tables;
+ struct smu_table_cache *cache;
+ int ret;
+
+ ret = smu_table_cache_init(smu, SMU_TABLE_BASEBOARD_TEMP_METRICS,
+ sizeof(*baseboard_temp_metrics), 50);
+ if (ret)
+ return ret;
+ /* Initialize base board temperature metrics */
+ cache = &(tables[SMU_TABLE_BASEBOARD_TEMP_METRICS].cache);
+ baseboard_temp_metrics =
+ (struct amdgpu_baseboard_temp_metrics_v1_0 *) cache->buffer;
+ smu_cmn_init_baseboard_temp_metrics(baseboard_temp_metrics, 1, 0);
+ /* Initialize GPU board temperature metrics */
+ ret = smu_table_cache_init(smu, SMU_TABLE_GPUBOARD_TEMP_METRICS,
+ sizeof(*gpuboard_temp_metrics), 50);
+ if (ret) {
+ smu_table_cache_fini(smu, SMU_TABLE_BASEBOARD_TEMP_METRICS);
+ return ret;
+ }
+ cache = &(tables[SMU_TABLE_GPUBOARD_TEMP_METRICS].cache);
+ gpuboard_temp_metrics = (struct amdgpu_gpuboard_temp_metrics_v1_0 *)cache->buffer;
+ smu_cmn_init_gpuboard_temp_metrics(gpuboard_temp_metrics, 1, 0);
+
+ return 0;
+}
+
+void smu_v13_0_12_tables_fini(struct smu_context *smu)
+{
+ smu_table_cache_fini(smu, SMU_TABLE_BASEBOARD_TEMP_METRICS);
+ smu_table_cache_fini(smu, SMU_TABLE_GPUBOARD_TEMP_METRICS);
+}
+
static int smu_v13_0_12_get_enabled_mask(struct smu_context *smu,
uint64_t *feature_mask)
{
@@ -184,7 +222,8 @@ static int smu_v13_0_12_fru_get_product_info(struct smu_context *smu,
int smu_v13_0_12_get_max_metrics_size(void)
{
- return max(sizeof(StaticMetricsTable_t), sizeof(MetricsTable_t));
+ return max3(sizeof(StaticMetricsTable_t), sizeof(MetricsTable_t),
+ sizeof(SystemMetricsTable_t));
}
static void smu_v13_0_12_init_xgmi_data(struct smu_context *smu,
@@ -220,7 +259,7 @@ int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu)
struct PPTable_t *pptable =
(struct PPTable_t *)smu_table->driver_pptable;
uint32_t table_version;
- int ret, i;
+ int ret, i, n;
if (!pptable->Init) {
ret = smu_v13_0_6_get_static_metrics_table(smu);
@@ -259,6 +298,22 @@ int smu_v13_0_12_setup_driver_pptable(struct smu_context *smu)
/* use AID0 serial number by default */
pptable->PublicSerialNumber_AID =
static_metrics->PublicSerialNumber_AID[0];
+
+ amdgpu_device_set_uid(smu->adev->uid_info, AMDGPU_UID_TYPE_SOC,
+ 0, pptable->PublicSerialNumber_AID);
+ n = ARRAY_SIZE(static_metrics->PublicSerialNumber_AID);
+ for (i = 0; i < n; i++) {
+ amdgpu_device_set_uid(
+ smu->adev->uid_info, AMDGPU_UID_TYPE_AID, i,
+ static_metrics->PublicSerialNumber_AID[i]);
+ }
+ n = ARRAY_SIZE(static_metrics->PublicSerialNumber_XCD);
+ for (i = 0; i < n; i++) {
+ amdgpu_device_set_uid(
+ smu->adev->uid_info, AMDGPU_UID_TYPE_XCD, i,
+ static_metrics->PublicSerialNumber_XCD[i]);
+ }
+
ret = smu_v13_0_12_fru_get_product_info(smu, static_metrics);
if (ret)
return ret;
@@ -359,6 +414,243 @@ int smu_v13_0_12_get_smu_metrics_data(struct smu_context *smu,
return 0;
}
+static int smu_v13_0_12_get_system_metrics_table(struct smu_context *smu,
+ void *metrics_table)
+{
+ struct smu_table_context *smu_table = &smu->smu_table;
+ uint32_t table_size = smu_table->tables[SMU_TABLE_SMU_METRICS].size;
+ struct smu_table *table = &smu_table->driver_table;
+ int ret;
+
+ ret = smu_cmn_send_smc_msg(smu, SMU_MSG_GetSystemMetricsTable, NULL);
+ if (ret) {
+ dev_info(smu->adev->dev,
+ "Failed to export system metrics table!\n");
+ return ret;
+ }
+
+ amdgpu_asic_invalidate_hdp(smu->adev, NULL);
+ memcpy(smu_table->metrics_table, table->cpu_addr, table_size);
+
+ if (metrics_table)
+ memcpy(metrics_table, smu_table->metrics_table, sizeof(SystemMetricsTable_t));
+
+ return 0;
+}
+
+static enum amdgpu_node_temp smu_v13_0_12_get_node_sensor_type(NODE_TEMP_e type)
+{
+ switch (type) {
+ case NODE_TEMP_RETIMER:
+ return AMDGPU_RETIMER_X_TEMP;
+ case NODE_TEMP_IBC_TEMP:
+ return AMDGPU_OAM_X_IBC_TEMP;
+ case NODE_TEMP_IBC_2_TEMP:
+ return AMDGPU_OAM_X_IBC_2_TEMP;
+ case NODE_TEMP_VDD18_VR_TEMP:
+ return AMDGPU_OAM_X_VDD18_VR_TEMP;
+ case NODE_TEMP_04_HBM_B_VR_TEMP:
+ return AMDGPU_OAM_X_04_HBM_B_VR_TEMP;
+ case NODE_TEMP_04_HBM_D_VR_TEMP:
+ return AMDGPU_OAM_X_04_HBM_D_VR_TEMP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static enum amdgpu_vr_temp smu_v13_0_12_get_vr_sensor_type(SVI_TEMP_e type)
+{
+ switch (type) {
+ case SVI_VDDCR_VDD0_TEMP:
+ return AMDGPU_VDDCR_VDD0_TEMP;
+ case SVI_VDDCR_VDD1_TEMP:
+ return AMDGPU_VDDCR_VDD1_TEMP;
+ case SVI_VDDCR_VDD2_TEMP:
+ return AMDGPU_VDDCR_VDD2_TEMP;
+ case SVI_VDDCR_VDD3_TEMP:
+ return AMDGPU_VDDCR_VDD3_TEMP;
+ case SVI_VDDCR_SOC_A_TEMP:
+ return AMDGPU_VDDCR_SOC_A_TEMP;
+ case SVI_VDDCR_SOC_C_TEMP:
+ return AMDGPU_VDDCR_SOC_C_TEMP;
+ case SVI_VDDCR_SOCIO_A_TEMP:
+ return AMDGPU_VDDCR_SOCIO_A_TEMP;
+ case SVI_VDDCR_SOCIO_C_TEMP:
+ return AMDGPU_VDDCR_SOCIO_C_TEMP;
+ case SVI_VDD_085_HBM_TEMP:
+ return AMDGPU_VDD_085_HBM_TEMP;
+ case SVI_VDDCR_11_HBM_B_TEMP:
+ return AMDGPU_VDDCR_11_HBM_B_TEMP;
+ case SVI_VDDCR_11_HBM_D_TEMP:
+ return AMDGPU_VDDCR_11_HBM_D_TEMP;
+ case SVI_VDD_USR_TEMP:
+ return AMDGPU_VDD_USR_TEMP;
+ case SVI_VDDIO_11_E32_TEMP:
+ return AMDGPU_VDDIO_11_E32_TEMP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static enum amdgpu_system_temp smu_v13_0_12_get_system_sensor_type(SYSTEM_TEMP_e type)
+{
+ switch (type) {
+ case SYSTEM_TEMP_UBB_FPGA:
+ return AMDGPU_UBB_FPGA_TEMP;
+ case SYSTEM_TEMP_UBB_FRONT:
+ return AMDGPU_UBB_FRONT_TEMP;
+ case SYSTEM_TEMP_UBB_BACK:
+ return AMDGPU_UBB_BACK_TEMP;
+ case SYSTEM_TEMP_UBB_OAM7:
+ return AMDGPU_UBB_OAM7_TEMP;
+ case SYSTEM_TEMP_UBB_IBC:
+ return AMDGPU_UBB_IBC_TEMP;
+ case SYSTEM_TEMP_UBB_UFPGA:
+ return AMDGPU_UBB_UFPGA_TEMP;
+ case SYSTEM_TEMP_UBB_OAM1:
+ return AMDGPU_UBB_OAM1_TEMP;
+ case SYSTEM_TEMP_OAM_0_1_HSC:
+ return AMDGPU_OAM_0_1_HSC_TEMP;
+ case SYSTEM_TEMP_OAM_2_3_HSC:
+ return AMDGPU_OAM_2_3_HSC_TEMP;
+ case SYSTEM_TEMP_OAM_4_5_HSC:
+ return AMDGPU_OAM_4_5_HSC_TEMP;
+ case SYSTEM_TEMP_OAM_6_7_HSC:
+ return AMDGPU_OAM_6_7_HSC_TEMP;
+ case SYSTEM_TEMP_UBB_FPGA_0V72_VR:
+ return AMDGPU_UBB_FPGA_0V72_VR_TEMP;
+ case SYSTEM_TEMP_UBB_FPGA_3V3_VR:
+ return AMDGPU_UBB_FPGA_3V3_VR_TEMP;
+ case SYSTEM_TEMP_RETIMER_0_1_2_3_1V2_VR:
+ return AMDGPU_RETIMER_0_1_2_3_1V2_VR_TEMP;
+ case SYSTEM_TEMP_RETIMER_4_5_6_7_1V2_VR:
+ return AMDGPU_RETIMER_4_5_6_7_1V2_VR_TEMP;
+ case SYSTEM_TEMP_RETIMER_0_1_0V9_VR:
+ return AMDGPU_RETIMER_0_1_0V9_VR_TEMP;
+ case SYSTEM_TEMP_RETIMER_4_5_0V9_VR:
+ return AMDGPU_RETIMER_4_5_0V9_VR_TEMP;
+ case SYSTEM_TEMP_RETIMER_2_3_0V9_VR:
+ return AMDGPU_RETIMER_2_3_0V9_VR_TEMP;
+ case SYSTEM_TEMP_RETIMER_6_7_0V9_VR:
+ return AMDGPU_RETIMER_6_7_0V9_VR_TEMP;
+ case SYSTEM_TEMP_OAM_0_1_2_3_3V3_VR:
+ return AMDGPU_OAM_0_1_2_3_3V3_VR_TEMP;
+ case SYSTEM_TEMP_OAM_4_5_6_7_3V3_VR:
+ return AMDGPU_OAM_4_5_6_7_3V3_VR_TEMP;
+ case SYSTEM_TEMP_IBC_HSC:
+ return AMDGPU_IBC_HSC_TEMP;
+ case SYSTEM_TEMP_IBC:
+ return AMDGPU_IBC_TEMP;
+ default:
+ return -EINVAL;
+ }
+}
+
+static bool smu_v13_0_12_is_temp_metrics_supported(struct smu_context *smu,
+ enum smu_temp_metric_type type)
+{
+ switch (type) {
+ case SMU_TEMP_METRIC_BASEBOARD:
+ if (smu->adev->gmc.xgmi.physical_node_id == 0 &&
+ smu->adev->gmc.xgmi.num_physical_nodes > 1 &&
+ smu_v13_0_6_cap_supported(smu, SMU_CAP(TEMP_METRICS)))
+ return true;
+ break;
+ case SMU_TEMP_METRIC_GPUBOARD:
+ return smu_v13_0_6_cap_supported(smu, SMU_CAP(TEMP_METRICS));
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static ssize_t smu_v13_0_12_get_temp_metrics(struct smu_context *smu,
+ enum smu_temp_metric_type type, void *table)
+{
+ struct amdgpu_baseboard_temp_metrics_v1_0 *baseboard_temp_metrics;
+ struct amdgpu_gpuboard_temp_metrics_v1_0 *gpuboard_temp_metrics;
+ struct smu_table_context *smu_table = &smu->smu_table;
+ SystemMetricsTable_t *metrics =
+ (SystemMetricsTable_t *)smu_table->metrics_table;
+
+ struct smu_table *data_table;
+ int ret, sensor_type;
+ u32 idx, sensors;
+ ssize_t size;
+
+ if (type == SMU_TEMP_METRIC_BASEBOARD) {
+ /* Initialize base board temperature metrics */
+ data_table =
+ &smu->smu_table.tables[SMU_TABLE_BASEBOARD_TEMP_METRICS];
+ baseboard_temp_metrics =
+ (struct amdgpu_baseboard_temp_metrics_v1_0 *)
+ data_table->cache.buffer;
+ size = sizeof(*baseboard_temp_metrics);
+ } else {
+ data_table =
+ &smu->smu_table.tables[SMU_TABLE_GPUBOARD_TEMP_METRICS];
+ gpuboard_temp_metrics =
+ (struct amdgpu_gpuboard_temp_metrics_v1_0 *)
+ data_table->cache.buffer;
+ size = sizeof(*baseboard_temp_metrics);
+ }
+
+ ret = smu_v13_0_12_get_system_metrics_table(smu, NULL);
+ if (ret)
+ return ret;
+
+ smu_table_cache_update_time(data_table, jiffies);
+
+ if (type == SMU_TEMP_METRIC_GPUBOARD) {
+ gpuboard_temp_metrics->accumulation_counter = metrics->AccumulationCounter;
+ gpuboard_temp_metrics->label_version = metrics->LabelVersion;
+ gpuboard_temp_metrics->node_id = metrics->NodeIdentifier;
+
+ idx = 0;
+ for (sensors = 0; sensors < NODE_TEMP_MAX_TEMP_ENTRIES; sensors++) {
+ if (metrics->NodeTemperatures[sensors] != -1) {
+ sensor_type = smu_v13_0_12_get_node_sensor_type(sensors);
+ gpuboard_temp_metrics->node_temp[idx] =
+ ((int)metrics->NodeTemperatures[sensors]) & 0xFFFFFF;
+ gpuboard_temp_metrics->node_temp[idx] |= (sensor_type << 24);
+ idx++;
+ }
+ }
+
+ idx = 0;
+
+ for (sensors = 0; sensors < SVI_MAX_TEMP_ENTRIES; sensors++) {
+ if (metrics->VrTemperatures[sensors] != -1) {
+ sensor_type = smu_v13_0_12_get_vr_sensor_type(sensors);
+ gpuboard_temp_metrics->vr_temp[idx] =
+ ((int)metrics->VrTemperatures[sensors]) & 0xFFFFFF;
+ gpuboard_temp_metrics->vr_temp[idx] |= (sensor_type << 24);
+ idx++;
+ }
+ }
+ } else if (type == SMU_TEMP_METRIC_BASEBOARD) {
+ baseboard_temp_metrics->accumulation_counter = metrics->AccumulationCounter;
+ baseboard_temp_metrics->label_version = metrics->LabelVersion;
+ baseboard_temp_metrics->node_id = metrics->NodeIdentifier;
+
+ idx = 0;
+ for (sensors = 0; sensors < SYSTEM_TEMP_MAX_ENTRIES; sensors++) {
+ if (metrics->SystemTemperatures[sensors] != -1) {
+ sensor_type = smu_v13_0_12_get_system_sensor_type(sensors);
+ baseboard_temp_metrics->system_temp[idx] =
+ ((int)metrics->SystemTemperatures[sensors]) & 0xFFFFFF;
+ baseboard_temp_metrics->system_temp[idx] |= (sensor_type << 24);
+ idx++;
+ }
+ }
+ }
+
+ memcpy(table, data_table->cache.buffer, size);
+
+ return size;
+}
+
ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu, struct amdgpu_xcp *xcp, void *table, void *smu_metrics)
{
const u8 num_jpeg_rings = NUM_JPEG_RINGS_FW;
@@ -572,3 +864,8 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table, void
return sizeof(*gpu_metrics);
}
+
+const struct smu_temp_funcs smu_v13_0_12_temp_funcs = {
+ .temp_metrics_is_supported = smu_v13_0_12_is_temp_metrics_supported,
+ .get_temp_metrics = smu_v13_0_12_get_temp_metrics,
+};
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 9cc294f4708b..e37b7b5358ea 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -145,7 +145,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
MSG_MAP(GetDpmFreqByIndex, PPSMC_MSG_GetDpmFreqByIndex, 1),
MSG_MAP(SetPptLimit, PPSMC_MSG_SetPptLimit, 0),
MSG_MAP(GetPptLimit, PPSMC_MSG_GetPptLimit, 1),
- MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI),
+ MSG_MAP(GfxDeviceDriverReset, PPSMC_MSG_GfxDriverReset, SMU_MSG_RAS_PRI | SMU_MSG_NO_PRECHECK),
MSG_MAP(DramLogSetDramAddrHigh, PPSMC_MSG_DramLogSetDramAddrHigh, 0),
MSG_MAP(DramLogSetDramAddrLow, PPSMC_MSG_DramLogSetDramAddrLow, 0),
MSG_MAP(DramLogSetDramSize, PPSMC_MSG_DramLogSetDramSize, 0),
@@ -177,7 +177,7 @@ static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COU
MSG_MAP(SetThrottlingPolicy, PPSMC_MSG_SetThrottlingPolicy, 0),
MSG_MAP(ResetSDMA, PPSMC_MSG_ResetSDMA, 0),
MSG_MAP(ResetVCN, PPSMC_MSG_ResetVCN, 0),
- MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 0),
+ MSG_MAP(GetStaticMetricsTable, PPSMC_MSG_GetStaticMetricsTable, 1),
};
// clang-format on
@@ -312,6 +312,8 @@ static void smu_v13_0_14_init_caps(struct smu_context *smu)
smu_v13_0_6_cap_set(smu, SMU_CAP(PER_INST_METRICS));
if (fw_ver >= 0x5551200)
smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET));
+ if (fw_ver >= 0x5551800)
+ smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET));
if (fw_ver >= 0x5551600) {
smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS));
smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE));
@@ -350,6 +352,13 @@ static void smu_v13_0_12_init_caps(struct smu_context *smu)
smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE));
smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION));
}
+
+ if (fw_ver >= 0x04560700) {
+ if (!amdgpu_sriov_vf(smu->adev))
+ smu_v13_0_6_cap_set(smu, SMU_CAP(TEMP_METRICS));
+ } else {
+ smu_v13_0_12_tables_fini(smu);
+ }
}
static void smu_v13_0_6_init_caps(struct smu_context *smu)
@@ -402,19 +411,37 @@ static void smu_v13_0_6_init_caps(struct smu_context *smu)
if ((pgm == 7 && fw_ver >= 0x7550E00) ||
(pgm == 0 && fw_ver >= 0x00557E00))
smu_v13_0_6_cap_set(smu, SMU_CAP(HST_LIMIT_METRICS));
- if ((pgm == 0 && fw_ver >= 0x00557F01) ||
- (pgm == 7 && fw_ver >= 0x7551000)) {
- smu_v13_0_6_cap_set(smu, SMU_CAP(STATIC_METRICS));
- smu_v13_0_6_cap_set(smu, SMU_CAP(BOARD_VOLTAGE));
+
+ if (amdgpu_sriov_vf(adev)) {
+ if ((pgm == 0 && fw_ver >= 0x00558000) ||
+ (pgm == 7 && fw_ver >= 0x7551000)) {
+ smu_v13_0_6_cap_set(smu,
+ SMU_CAP(STATIC_METRICS));
+ smu_v13_0_6_cap_set(smu,
+ SMU_CAP(BOARD_VOLTAGE));
+ smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION));
+ }
+ } else {
+ if ((pgm == 0 && fw_ver >= 0x00557F01) ||
+ (pgm == 7 && fw_ver >= 0x7551000)) {
+ smu_v13_0_6_cap_set(smu,
+ SMU_CAP(STATIC_METRICS));
+ smu_v13_0_6_cap_set(smu,
+ SMU_CAP(BOARD_VOLTAGE));
+ }
+ if ((pgm == 0 && fw_ver >= 0x00558000) ||
+ (pgm == 7 && fw_ver >= 0x7551000))
+ smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION));
}
- if ((pgm == 0 && fw_ver >= 0x00558000) ||
- (pgm == 7 && fw_ver >= 0x7551000))
- smu_v13_0_6_cap_set(smu, SMU_CAP(PLDM_VERSION));
}
if (((pgm == 7) && (fw_ver >= 0x7550700)) ||
((pgm == 0) && (fw_ver >= 0x00557900)) ||
((pgm == 4) && (fw_ver >= 0x4557000)))
smu_v13_0_6_cap_set(smu, SMU_CAP(SDMA_RESET));
+
+ if (((pgm == 0) && (fw_ver >= 0x00558200)) ||
+ ((pgm == 4) && (fw_ver >= 0x04557100)))
+ smu_v13_0_6_cap_set(smu, SMU_CAP(VCN_RESET));
}
static void smu_v13_0_x_init_caps(struct smu_context *smu)
@@ -511,8 +538,12 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
{
struct smu_table_context *smu_table = &smu->smu_table;
struct smu_table *tables = smu_table->tables;
+ void *gpu_metrics_table __free(kfree) = NULL;
+ void *driver_pptable __free(kfree) = NULL;
+ void *metrics_table __free(kfree) = NULL;
struct amdgpu_device *adev = smu->adev;
int gpu_metrcs_size = METRICS_TABLE_SIZE;
+ int ret;
if (!(adev->flags & AMD_IS_APU))
SMU_TABLE_INIT(tables, SMU_TABLE_PMSTATUSLOG, SMU13_TOOL_SIZE,
@@ -528,27 +559,32 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
- smu_table->metrics_table = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL);
- if (!smu_table->metrics_table)
+ metrics_table = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL);
+ if (!metrics_table)
return -ENOMEM;
smu_table->metrics_time = 0;
smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v1_8);
- smu_table->gpu_metrics_table =
+ gpu_metrics_table =
kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
- if (!smu_table->gpu_metrics_table) {
- kfree(smu_table->metrics_table);
+ if (!gpu_metrics_table)
return -ENOMEM;
- }
- smu_table->driver_pptable =
- kzalloc(sizeof(struct PPTable_t), GFP_KERNEL);
- if (!smu_table->driver_pptable) {
- kfree(smu_table->metrics_table);
- kfree(smu_table->gpu_metrics_table);
+ driver_pptable = kzalloc(sizeof(struct PPTable_t), GFP_KERNEL);
+ if (!driver_pptable)
return -ENOMEM;
+
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) ==
+ IP_VERSION(13, 0, 12)) {
+ ret = smu_v13_0_12_tables_init(smu);
+ if (ret)
+ return ret;
}
+ smu_table->gpu_metrics_table = no_free_ptr(gpu_metrics_table);
+ smu_table->metrics_table = no_free_ptr(metrics_table);
+ smu_table->driver_pptable = no_free_ptr(driver_pptable);
+
return 0;
}
@@ -677,6 +713,13 @@ static int smu_v13_0_6_init_smc_tables(struct smu_context *smu)
return ret;
}
+static int smu_v13_0_6_fini_smc_tables(struct smu_context *smu)
+{
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12))
+ smu_v13_0_12_tables_fini(smu);
+ return smu_v13_0_fini_smc_tables(smu);
+}
+
static int smu_v13_0_6_get_allowed_feature_mask(struct smu_context *smu,
uint32_t *feature_mask,
uint32_t num)
@@ -803,7 +846,7 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
struct PPTable_t *pptable =
(struct PPTable_t *)smu_table->driver_pptable;
int version = smu_v13_0_6_get_metrics_version(smu);
- int ret, i, retry = 100;
+ int ret, i, retry = 100, n;
uint32_t table_version;
uint16_t max_speed;
uint8_t max_width;
@@ -865,6 +908,23 @@ static int smu_v13_0_6_setup_driver_pptable(struct smu_context *smu)
pptable->PublicSerialNumber_AID =
GET_METRIC_FIELD(PublicSerialNumber_AID, version)[0];
+ amdgpu_device_set_uid(smu->adev->uid_info, AMDGPU_UID_TYPE_SOC,
+ 0, pptable->PublicSerialNumber_AID);
+ n = ARRAY_SIZE(metrics_v0->PublicSerialNumber_AID);
+ for (i = 0; i < n; i++) {
+ amdgpu_device_set_uid(
+ smu->adev->uid_info, AMDGPU_UID_TYPE_AID, i,
+ GET_METRIC_FIELD(PublicSerialNumber_AID,
+ version)[i]);
+ }
+ n = ARRAY_SIZE(metrics_v0->PublicSerialNumber_XCD);
+ for (i = 0; i < n; i++) {
+ amdgpu_device_set_uid(
+ smu->adev->uid_info, AMDGPU_UID_TYPE_XCD, i,
+ GET_METRIC_FIELD(PublicSerialNumber_XCD,
+ version)[i]);
+ }
+
pptable->Init = true;
if (smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) {
ret = smu_v13_0_6_get_static_metrics_table(smu);
@@ -2560,9 +2620,9 @@ static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id,
const u8 num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3;
int version = smu_v13_0_6_get_metrics_version(smu);
struct amdgpu_partition_metrics_v1_0 *xcp_metrics;
+ MetricsTableV0_t *metrics_v0 __free(kfree) = NULL;
struct amdgpu_device *adev = smu->adev;
int ret, inst, i, j, k, idx;
- MetricsTableV0_t *metrics_v0;
MetricsTableV1_t *metrics_v1;
MetricsTableV2_t *metrics_v2;
struct amdgpu_xcp *xcp;
@@ -2587,17 +2647,14 @@ static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id,
return -ENOMEM;
ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, false);
- if (ret) {
- kfree(metrics_v0);
+ if (ret)
return ret;
- }
if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) ==
IP_VERSION(13, 0, 12) &&
- smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) {
- ret = smu_v13_0_12_get_xcp_metrics(smu, xcp, table, metrics_v0);
- goto out;
- }
+ smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS)))
+ return smu_v13_0_12_get_xcp_metrics(smu, xcp, table,
+ metrics_v0);
metrics_v1 = (MetricsTableV1_t *)metrics_v0;
metrics_v2 = (MetricsTableV2_t *)metrics_v0;
@@ -2668,8 +2725,6 @@ static ssize_t smu_v13_0_6_get_xcp_metrics(struct smu_context *smu, int xcp_id,
idx++;
}
}
-out:
- kfree(metrics_v0);
return sizeof(*xcp_metrics);
}
@@ -2680,31 +2735,26 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
struct gpu_metrics_v1_8 *gpu_metrics =
(struct gpu_metrics_v1_8 *)smu_table->gpu_metrics_table;
int version = smu_v13_0_6_get_metrics_version(smu);
+ MetricsTableV0_t *metrics_v0 __free(kfree) = NULL;
int ret = 0, xcc_id, inst, i, j, k, idx;
struct amdgpu_device *adev = smu->adev;
- MetricsTableV0_t *metrics_v0;
MetricsTableV1_t *metrics_v1;
MetricsTableV2_t *metrics_v2;
struct amdgpu_xcp *xcp;
u16 link_width_level;
- ssize_t num_bytes;
u8 num_jpeg_rings;
u32 inst_mask;
bool per_inst;
metrics_v0 = kzalloc(METRICS_TABLE_SIZE, GFP_KERNEL);
ret = smu_v13_0_6_get_metrics_table(smu, metrics_v0, false);
- if (ret) {
- kfree(metrics_v0);
+ if (ret)
return ret;
- }
- if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) &&
- smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS))) {
- num_bytes = smu_v13_0_12_get_gpu_metrics(smu, table, metrics_v0);
- kfree(metrics_v0);
- return num_bytes;
- }
+ if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) ==
+ IP_VERSION(13, 0, 12) &&
+ smu_v13_0_6_cap_supported(smu, SMU_CAP(STATIC_METRICS)))
+ return smu_v13_0_12_get_gpu_metrics(smu, table, metrics_v0);
metrics_v1 = (MetricsTableV1_t *)metrics_v0;
metrics_v2 = (MetricsTableV2_t *)metrics_v0;
@@ -2890,7 +2940,6 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
gpu_metrics->firmware_timestamp = GET_METRIC_FIELD(Timestamp, version);
*table = (void *)gpu_metrics;
- kfree(metrics_v0);
return sizeof(*gpu_metrics);
}
@@ -3076,7 +3125,7 @@ static inline bool smu_v13_0_6_is_link_reset_supported(struct smu_context *smu)
struct amdgpu_device *adev = smu->adev;
int var = (adev->pdev->device & 0xF);
- if (var == 0x1)
+ if (var == 0x0 || var == 0x1 || var == 0x3)
return true;
return false;
@@ -3152,6 +3201,11 @@ static int smu_v13_0_6_reset_sdma(struct smu_context *smu, uint32_t inst_mask)
return ret;
}
+static bool smu_v13_0_6_reset_vcn_is_supported(struct smu_context *smu)
+{
+ return smu_v13_0_6_cap_supported(smu, SMU_CAP(VCN_RESET));
+}
+
static int smu_v13_0_6_reset_vcn(struct smu_context *smu, uint32_t inst_mask)
{
int ret = 0;
@@ -3797,7 +3851,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
.init_microcode = smu_v13_0_6_init_microcode,
.fini_microcode = smu_v13_0_fini_microcode,
.init_smc_tables = smu_v13_0_6_init_smc_tables,
- .fini_smc_tables = smu_v13_0_fini_smc_tables,
+ .fini_smc_tables = smu_v13_0_6_fini_smc_tables,
.init_power = smu_v13_0_init_power,
.fini_power = smu_v13_0_fini_power,
.check_fw_status = smu_v13_0_6_check_fw_status,
@@ -3840,6 +3894,7 @@ static const struct pptable_funcs smu_v13_0_6_ppt_funcs = {
.reset_sdma = smu_v13_0_6_reset_sdma,
.reset_sdma_is_supported = smu_v13_0_6_reset_sdma_is_supported,
.dpm_reset_vcn = smu_v13_0_6_reset_vcn,
+ .reset_vcn_is_supported = smu_v13_0_6_reset_vcn_is_supported,
};
void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
@@ -3857,3 +3912,9 @@ void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu)
amdgpu_mca_smu_init_funcs(smu->adev, &smu_v13_0_6_mca_smu_funcs);
amdgpu_aca_set_smu_funcs(smu->adev, &smu_v13_0_6_aca_smu_funcs);
}
+
+void smu_v13_0_6_set_temp_funcs(struct smu_context *smu)
+{
+ smu->smu_temp.temp_funcs = (amdgpu_ip_version(smu->adev, MP1_HWIP, 0)
+ == IP_VERSION(13, 0, 12)) ? &smu_v13_0_12_temp_funcs : NULL;
+}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h
index 67b30674fd31..bcb8246c0804 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.h
@@ -64,14 +64,17 @@ enum smu_v13_0_6_caps {
SMU_CAP(RMA_MSG),
SMU_CAP(ACA_SYND),
SMU_CAP(SDMA_RESET),
+ SMU_CAP(VCN_RESET),
SMU_CAP(STATIC_METRICS),
SMU_CAP(HST_LIMIT_METRICS),
SMU_CAP(BOARD_VOLTAGE),
SMU_CAP(PLDM_VERSION),
+ SMU_CAP(TEMP_METRICS),
SMU_CAP(ALL),
};
extern void smu_v13_0_6_set_ppt_funcs(struct smu_context *smu);
+extern void smu_v13_0_6_set_temp_funcs(struct smu_context *smu);
bool smu_v13_0_6_cap_supported(struct smu_context *smu, enum smu_v13_0_6_caps cap);
int smu_v13_0_6_get_static_metrics_table(struct smu_context *smu);
int smu_v13_0_6_get_metrics_table(struct smu_context *smu, void *metrics_table,
@@ -86,6 +89,9 @@ ssize_t smu_v13_0_12_get_gpu_metrics(struct smu_context *smu, void **table, void
ssize_t smu_v13_0_12_get_xcp_metrics(struct smu_context *smu,
struct amdgpu_xcp *xcp, void *table,
void *smu_metrics);
+int smu_v13_0_12_tables_init(struct smu_context *smu);
+void smu_v13_0_12_tables_fini(struct smu_context *smu);
extern const struct cmn2asic_mapping smu_v13_0_12_feature_mask_map[];
extern const struct cmn2asic_msg_mapping smu_v13_0_12_message_map[];
+extern const struct smu_temp_funcs smu_v13_0_12_temp_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
index 3aea32baea3d..f32474af90b3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
@@ -1697,9 +1697,11 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu,
uint32_t *min_power_limit)
{
struct smu_table_context *table_context = &smu->smu_table;
+ struct smu_14_0_2_powerplay_table *powerplay_table =
+ table_context->power_play_table;
PPTable_t *pptable = table_context->driver_pptable;
CustomSkuTable_t *skutable = &pptable->CustomSkuTable;
- uint32_t power_limit;
+ uint32_t power_limit, od_percent_upper = 0, od_percent_lower = 0;
uint32_t msg_limit = pptable->SkuTable.MsgLimits.Power[PPT_THROTTLER_PPT0][POWER_SOURCE_AC];
if (smu_v14_0_get_current_power_limit(smu, &power_limit))
@@ -1712,11 +1714,29 @@ static int smu_v14_0_2_get_power_limit(struct smu_context *smu,
if (default_power_limit)
*default_power_limit = power_limit;
- if (max_power_limit)
- *max_power_limit = msg_limit;
+ if (powerplay_table) {
+ if (smu->od_enabled &&
+ smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_PPT_BIT)) {
+ od_percent_upper = pptable->SkuTable.OverDriveLimitsBasicMax.Ppt;
+ od_percent_lower = pptable->SkuTable.OverDriveLimitsBasicMin.Ppt;
+ } else if (smu_v14_0_2_is_od_feature_supported(smu, PP_OD_FEATURE_PPT_BIT)) {
+ od_percent_upper = 0;
+ od_percent_lower = pptable->SkuTable.OverDriveLimitsBasicMin.Ppt;
+ }
+ }
+
+ dev_dbg(smu->adev->dev, "od percent upper:%d, od percent lower:%d (default power: %d)\n",
+ od_percent_upper, od_percent_lower, power_limit);
+
+ if (max_power_limit) {
+ *max_power_limit = msg_limit * (100 + od_percent_upper);
+ *max_power_limit /= 100;
+ }
- if (min_power_limit)
- *min_power_limit = 0;
+ if (min_power_limit) {
+ *min_power_limit = power_limit * (100 + od_percent_lower);
+ *min_power_limit /= 100;
+ }
return 0;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 59f9abd0f7b8..f532f7c69259 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -256,11 +256,12 @@ static int __smu_cmn_ras_filter_msg(struct smu_context *smu,
{
struct amdgpu_device *adev = smu->adev;
uint32_t flags, resp;
- bool fed_status;
+ bool fed_status, pri;
flags = __smu_cmn_get_msg_flags(smu, msg);
*poll = true;
+ pri = !!(flags & SMU_MSG_NO_PRECHECK);
/* When there is RAS fatal error, FW won't process non-RAS priority
* messages. Don't allow any messages other than RAS priority messages.
*/
@@ -272,15 +273,18 @@ static int __smu_cmn_ras_filter_msg(struct smu_context *smu,
smu_get_message_name(smu, msg));
return -EACCES;
}
+ }
+ if (pri || fed_status) {
/* FW will ignore non-priority messages when a RAS fatal error
- * is detected. Hence it is possible that a previous message
- * wouldn't have got response. Allow to continue without polling
- * for response status for priority messages.
+ * or reset condition is detected. Hence it is possible that a
+ * previous message wouldn't have got response. Allow to
+ * continue without polling for response status for priority
+ * messages.
*/
resp = RREG32(smu->resp_reg);
dev_dbg(adev->dev,
- "Sending RAS priority message %s response status: %x",
+ "Sending priority message %s response status: %x",
smu_get_message_name(smu, msg), resp);
if (resp == 0)
*poll = false;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
index a608cdbdada4..d588f74b98de 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
@@ -65,6 +65,32 @@
header->structure_size = sizeof(*tmp); \
} while (0)
+#define smu_cmn_init_baseboard_temp_metrics(ptr, fr, cr) \
+ do { \
+ typecheck(struct amdgpu_baseboard_temp_metrics_v##fr##_##cr *, \
+ (ptr)); \
+ struct amdgpu_baseboard_temp_metrics_v##fr##_##cr *tmp = (ptr); \
+ struct metrics_table_header *header = \
+ (struct metrics_table_header *)tmp; \
+ memset(header, 0xFF, sizeof(*tmp)); \
+ header->format_revision = fr; \
+ header->content_revision = cr; \
+ header->structure_size = sizeof(*tmp); \
+ } while (0)
+
+#define smu_cmn_init_gpuboard_temp_metrics(ptr, fr, cr) \
+ do { \
+ typecheck(struct amdgpu_gpuboard_temp_metrics_v##fr##_##cr *, \
+ (ptr)); \
+ struct amdgpu_gpuboard_temp_metrics_v##fr##_##cr *tmp = (ptr); \
+ struct metrics_table_header *header = \
+ (struct metrics_table_header *)tmp; \
+ memset(header, 0xFF, sizeof(*tmp)); \
+ header->format_revision = fr; \
+ header->content_revision = cr; \
+ header->structure_size = sizeof(*tmp); \
+ } while (0)
+
extern const int link_speed[];
/* Helper to Convert from PCIE Gen 1/2/3/4/5/6 to 0.1 GT/s speed units */