diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 31 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 35 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 7 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 31 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c | 5 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 40 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_events.c | 8 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_events.h | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_iommu.c | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_module.c | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_pasid.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 73 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 188 |
17 files changed, 337 insertions, 104 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 24b471734117..dcb1d89d776e 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -91,7 +91,7 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev, (const struct cik_ih_ring_entry *)ih_ring_entry; uint32_t context_id = ihre->data & 0xfffffff; unsigned int vmid = (ihre->ring_id & 0x0000ff00) >> 8; - unsigned int pasid = (ihre->ring_id & 0xffff0000) >> 16; + u32 pasid = (ihre->ring_id & 0xffff0000) >> 16; if (pasid == 0) return; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index b7b16adb0615..222f1df1a6b6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -97,6 +97,7 @@ void kfd_chardev_exit(void) device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0)); class_destroy(kfd_class); unregister_chrdev(kfd_char_dev_major, kfd_dev_name); + kfd_device = NULL; } struct device *kfd_chardev(void) @@ -1290,18 +1291,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, return -EINVAL; } - if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { - if (args->size != kfd_doorbell_process_slice(dev)) - return -EINVAL; - offset = kfd_get_process_doorbells(dev, p); - } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { - if (args->size != PAGE_SIZE) - return -EINVAL; - offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); - if (!offset) - return -ENOMEM; - } - mutex_lock(&p->mutex); pdd = kfd_bind_process_to_device(dev, p); @@ -1310,6 +1299,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, goto err_unlock; } + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { + if (args->size != kfd_doorbell_process_slice(dev)) { + err = -EINVAL; + goto err_unlock; + } + offset = kfd_get_process_doorbells(pdd); + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { + if (args->size != PAGE_SIZE) { + err = -EINVAL; + goto err_unlock; + } + offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); + if (!offset) { + err = -ENOMEM; + goto err_unlock; + } + } + err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( dev->kgd, args->va_addr, args->size, pdd->vm, (struct kgd_mem **) &mem, &offset, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 3fac06b281ce..5e2254b9e931 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -797,7 +797,8 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) return -ENODATA; } - pcrat_image = kmemdup(crat_table, crat_table->length, GFP_KERNEL); + pcrat_image = kvmalloc(crat_table->length, GFP_KERNEL); + memcpy(pcrat_image, crat_table, crat_table->length); if (!pcrat_image) return -ENOMEM; @@ -809,11 +810,10 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size) /* Memory required to create Virtual CRAT. * Since there is no easy way to predict the amount of memory required, the - * following amount are allocated for CPU and GPU Virtual CRAT. This is + * following amount is allocated for GPU Virtual CRAT. This is * expected to cover all known conditions. But to be safe additional check * is put in the code to ensure we don't overwrite. */ -#define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE) #define VCRAT_SIZE_FOR_GPU (4 * PAGE_SIZE) /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node @@ -964,7 +964,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size) #endif int ret = 0; - if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU) + if (!pcrat_image) return -EINVAL; /* Fill in CRAT Header. @@ -1364,30 +1364,37 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, uint32_t proximity_domain) { void *pcrat_image = NULL; - int ret = 0; + int ret = 0, num_nodes; + size_t dyn_size; if (!crat_image) return -EINVAL; *crat_image = NULL; - /* Allocate one VCRAT_SIZE_FOR_CPU for CPU virtual CRAT image and - * VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. This should cover - * all the current conditions. A check is put not to overwrite beyond - * allocated size + /* Allocate the CPU Virtual CRAT size based on the number of online + * nodes. Allocate VCRAT_SIZE_FOR_GPU for GPU virtual CRAT image. + * This should cover all the current conditions. A check is put not + * to overwrite beyond allocated size for GPUs */ switch (flags) { case COMPUTE_UNIT_CPU: - pcrat_image = kmalloc(VCRAT_SIZE_FOR_CPU, GFP_KERNEL); + num_nodes = num_online_nodes(); + dyn_size = sizeof(struct crat_header) + + num_nodes * (sizeof(struct crat_subtype_computeunit) + + sizeof(struct crat_subtype_memory) + + (num_nodes - 1) * sizeof(struct crat_subtype_iolink)); + pcrat_image = kvmalloc(dyn_size, GFP_KERNEL); if (!pcrat_image) return -ENOMEM; - *size = VCRAT_SIZE_FOR_CPU; + *size = dyn_size; + pr_debug("CRAT size is %ld", dyn_size); ret = kfd_create_vcrat_image_cpu(pcrat_image, size); break; case COMPUTE_UNIT_GPU: if (!kdev) return -EINVAL; - pcrat_image = kmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); + pcrat_image = kvmalloc(VCRAT_SIZE_FOR_GPU, GFP_KERNEL); if (!pcrat_image) return -ENOMEM; *size = VCRAT_SIZE_FOR_GPU; @@ -1406,7 +1413,7 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, if (!ret) *crat_image = pcrat_image; else - kfree(pcrat_image); + kvfree(pcrat_image); return ret; } @@ -1419,5 +1426,5 @@ int kfd_create_crat_image_virtual(void **crat_image, size_t *size, */ void kfd_destroy_crat_image(void *crat_image) { - kfree(crat_image); + kvfree(crat_image); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index 27bcc5b472f6..b258a3dae767 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -45,7 +45,7 @@ static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev) } static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, - unsigned int pasid, uint64_t vmid0_address, + u32 pasid, uint64_t vmid0_address, uint32_t *packet_buff, size_t size_in_bytes) { struct pm4__release_mem *rm_packet; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h index a04a1fe1d0d9..f9c6df1fdc5c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h @@ -275,7 +275,7 @@ struct kfd_dbgdev { }; struct kfd_dbgmgr { - unsigned int pasid; + u32 pasid; struct kfd_dev *dev; struct kfd_dbgdev *dbgdev; }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 0e71a0543f98..903170e59342 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -503,8 +503,8 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = { #ifdef KFD_SUPPORT_IOMMU_V2 [CHIP_KAVERI] = {&kaveri_device_info, NULL}, [CHIP_CARRIZO] = {&carrizo_device_info, NULL}, - [CHIP_RAVEN] = {&raven_device_info, NULL}, #endif + [CHIP_RAVEN] = {&raven_device_info, NULL}, [CHIP_HAWAII] = {&hawaii_device_info, NULL}, [CHIP_TONGA] = {&tonga_device_info, NULL}, [CHIP_FIJI] = {&fiji_device_info, &fiji_vf_device_info}, @@ -583,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, atomic_set(&kfd->sram_ecc_flag, 0); + ida_init(&kfd->doorbell_ida); + return kfd; } @@ -716,6 +718,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->unique_id = amdgpu_amdkfd_get_unique_id(kfd->kgd); + kfd->noretry = amdgpu_amdkfd_get_noretry(kfd->kgd); + if (kfd_interrupt_init(kfd)) { dev_err(kfd_device, "Error initializing interrupts\n"); goto kfd_interrupt_error; @@ -798,6 +802,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_interrupt_exit(kfd); kfd_topology_remove_device(kfd); kfd_doorbell_fini(kfd); + ida_destroy(&kfd->doorbell_ida); kfd_gtt_sa_fini(kfd); amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); if (kfd->gws) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 560adc57a050..c0ae04a08625 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -40,7 +40,7 @@ #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, - unsigned int pasid, unsigned int vmid); + u32 pasid, unsigned int vmid); static int execute_queues_cpsch(struct device_queue_manager *dqm, enum kfd_unmap_queues_filter filter, @@ -191,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) } q->properties.doorbell_off = - kfd_get_doorbell_dw_offset_in_bar(dev, q->process, + kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd), q->doorbell_id); - return 0; } @@ -650,9 +649,10 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, goto out; pdd = qpd_to_pdd(qpd); - pr_info_ratelimited("Evicting PASID 0x%x queues\n", + pr_debug_ratelimited("Evicting PASID 0x%x queues\n", pdd->process->pasid); + pdd->last_evict_timestamp = get_jiffies_64(); /* Mark all queues as evicted. Deactivate all active queues on * the qpd. */ @@ -700,7 +700,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, goto out; pdd = qpd_to_pdd(qpd); - pr_info_ratelimited("Evicting PASID 0x%x queues\n", + pr_debug_ratelimited("Evicting PASID 0x%x queues\n", pdd->process->pasid); /* Mark all queues as evicted. Deactivate all active queues on @@ -714,6 +714,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, q->properties.is_active = false; decrement_queue_count(dqm, q->properties.type); } + pdd->last_evict_timestamp = get_jiffies_64(); retval = execute_queues_cpsch(dqm, qpd->is_debug ? KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : @@ -732,6 +733,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, struct mqd_manager *mqd_mgr; struct kfd_process_device *pdd; uint64_t pd_base; + uint64_t eviction_duration; int retval, ret = 0; pdd = qpd_to_pdd(qpd); @@ -746,7 +748,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, goto out; } - pr_info_ratelimited("Restoring PASID 0x%x queues\n", + pr_debug_ratelimited("Restoring PASID 0x%x queues\n", pdd->process->pasid); /* Update PD Base in QPD */ @@ -799,6 +801,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, ret = retval; } qpd->evicted = 0; + eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; + atomic64_add(eviction_duration, &pdd->evict_duration_counter); out: if (mm) mmput(mm); @@ -812,6 +816,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, struct queue *q; struct kfd_process_device *pdd; uint64_t pd_base; + uint64_t eviction_duration; int retval = 0; pdd = qpd_to_pdd(qpd); @@ -826,7 +831,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, goto out; } - pr_info_ratelimited("Restoring PASID 0x%x queues\n", + pr_debug_ratelimited("Restoring PASID 0x%x queues\n", pdd->process->pasid); /* Update PD Base in QPD */ @@ -845,6 +850,8 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); qpd->evicted = 0; + eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; + atomic64_add(eviction_duration, &pdd->evict_duration_counter); out: dqm_unlock(dqm); return retval; @@ -924,7 +931,7 @@ out: } static int -set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, +set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, unsigned int vmid) { return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( @@ -1192,6 +1199,8 @@ static int stop_cpsch(struct device_queue_manager *dqm) dqm->sched_running = false; dqm_unlock(dqm); + pm_release_ib(&dqm->packets); + kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); pm_uninit(&dqm->packets, hanging); @@ -1302,7 +1311,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.is_active) { increment_queue_count(dqm, q->properties.type); - retval = execute_queues_cpsch(dqm, + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); } @@ -1955,8 +1964,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) kfree(dqm); } -int kfd_process_vm_fault(struct device_queue_manager *dqm, - unsigned int pasid) +int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid) { struct kfd_process_device *pdd; struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); @@ -1964,6 +1972,7 @@ int kfd_process_vm_fault(struct device_queue_manager *dqm, if (!p) return -EINVAL; + WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); pdd = kfd_get_process_device_data(dqm->dev, p); if (pdd) ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c index 72e4d61ac752..ad0593342333 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v10.c @@ -58,8 +58,9 @@ static int update_qpd_v10(struct device_queue_manager *dqm, /* check if sh_mem_config register already configured */ if (qpd->sh_mem_config == 0) { qpd->sh_mem_config = - SH_MEM_ALIGNMENT_MODE_UNALIGNED << - SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + (SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | + (3 << SH_MEM_CONFIG__INITIAL_INST_PREFETCH__SHIFT); #if 0 /* TODO: * This shouldn't be an issue with Navi10. Verify. diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index 309f63a0b34a..eca6331efa94 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -61,7 +61,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm, qpd->sh_mem_config = SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; - if (amdgpu_noretry && + if (dqm->dev->noretry && !dqm->dev->use_iommu_v2) qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index 8e0c00b9555e..768d153acff4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -31,9 +31,6 @@ * kernel queues using the first doorbell page reserved for the kernel. */ -static DEFINE_IDA(doorbell_ida); -static unsigned int max_doorbell_slices; - /* * Each device exposes a doorbell aperture, a PCI MMIO aperture that * receives 32-bit writes that are passed to queues as wptr values. @@ -84,9 +81,9 @@ int kfd_doorbell_init(struct kfd_dev *kfd) else return -ENOSPC; - if (!max_doorbell_slices || - doorbell_process_limit < max_doorbell_slices) - max_doorbell_slices = doorbell_process_limit; + if (!kfd->max_doorbell_slices || + doorbell_process_limit < kfd->max_doorbell_slices) + kfd->max_doorbell_slices = doorbell_process_limit; kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + doorbell_start_offset; @@ -130,6 +127,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, struct vm_area_struct *vma) { phys_addr_t address; + struct kfd_process_device *pdd; /* * For simplicitly we only allow mapping of the entire doorbell @@ -138,9 +136,12 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev)) return -EINVAL; - /* Calculate physical address of doorbell */ - address = kfd_get_process_doorbells(dev, process); + pdd = kfd_get_process_device_data(dev, process); + if (!pdd) + return -EINVAL; + /* Calculate physical address of doorbell */ + address = kfd_get_process_doorbells(pdd); vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; @@ -226,7 +227,7 @@ void write_kernel_doorbell64(void __iomem *db, u64 value) } unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, - struct kfd_process *process, + struct kfd_process_device *pdd, unsigned int doorbell_id) { /* @@ -236,7 +237,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, * units regardless of the ASIC-dependent doorbell size. */ return kfd->doorbell_base_dw_offset + - process->doorbell_index + pdd->doorbell_index * kfd_doorbell_process_slice(kfd) / sizeof(u32) + doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); } @@ -251,25 +252,24 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd) } -phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, - struct kfd_process *process) +phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd) { - return dev->doorbell_base + - process->doorbell_index * kfd_doorbell_process_slice(dev); + return pdd->dev->doorbell_base + + pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev); } -int kfd_alloc_process_doorbells(struct kfd_process *process) +int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index) { - int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices, + int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices, GFP_KERNEL); if (r > 0) - process->doorbell_index = r; + *doorbell_index = r; return r; } -void kfd_free_process_doorbells(struct kfd_process *process) +void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index) { - if (process->doorbell_index) - ida_simple_remove(&doorbell_ida, process->doorbell_index); + if (doorbell_index) + ida_simple_remove(&kfd->doorbell_ida, doorbell_index); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index a9583b95fcc1..ba2c2ce0c55a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -460,7 +460,7 @@ static void set_event_from_interrupt(struct kfd_process *p, } } -void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, +void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, uint32_t valid_id_bits) { struct kfd_event *ev = NULL; @@ -872,7 +872,7 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, } #ifdef KFD_SUPPORT_IOMMU_V2 -void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, +void kfd_signal_iommu_event(struct kfd_dev *dev, u32 pasid, unsigned long address, bool is_write_requested, bool is_execute_requested) { @@ -950,7 +950,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, } #endif /* KFD_SUPPORT_IOMMU_V2 */ -void kfd_signal_hw_exception_event(unsigned int pasid) +void kfd_signal_hw_exception_event(u32 pasid) { /* * Because we are called from arbitrary context (workqueue) as opposed @@ -971,7 +971,7 @@ void kfd_signal_hw_exception_event(unsigned int pasid) kfd_unref_process(p); } -void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, +void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid, struct kfd_vm_fault_info *info) { struct kfd_event *ev; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h index c7ac6c73af86..c8fe5dbdad55 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h @@ -79,7 +79,7 @@ struct kfd_event { #define KFD_EVENT_TYPE_DEBUG 5 #define KFD_EVENT_TYPE_MEMORY 8 -extern void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, - uint32_t valid_id_bits); +extern void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, + uint32_t valid_id_bits); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c index 5a64915abaf7..66bbca61e3ef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c @@ -139,7 +139,7 @@ void kfd_iommu_unbind_process(struct kfd_process *p) } /* Callback for process shutdown invoked by the IOMMU driver */ -static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) +static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, u32 pasid) { struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); struct kfd_process *p; @@ -185,8 +185,8 @@ static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) } /* This function called by IOMMU driver on PPR failure */ -static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, - unsigned long address, u16 flags) +static int iommu_invalid_ppr_cb(struct pci_dev *pdev, u32 pasid, + unsigned long address, u16 flags) { struct kfd_dev *dev; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index f4b7f7e6c40e..5e90fe642192 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -70,6 +70,7 @@ err_create_wq: err_topology: kfd_chardev_exit(); err_ioctl: + pr_err("KFD is disabled due to module initialization failure\n"); return err; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c index 2a07c4f2cd0d..af5816f51e55 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -51,7 +51,7 @@ unsigned int kfd_get_pasid_limit(void) return 1U << pasid_bits; } -unsigned int kfd_pasid_alloc(void) +u32 kfd_pasid_alloc(void) { int r = amdgpu_pasid_alloc(pasid_bits); @@ -63,7 +63,7 @@ unsigned int kfd_pasid_alloc(void) return 0; } -void kfd_pasid_free(unsigned int pasid) +void kfd_pasid_free(u32 pasid) { amdgpu_pasid_free(pasid); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 023629f28495..c77cf23032ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -314,6 +314,11 @@ struct kfd_dev { spinlock_t smi_lock; uint32_t reset_seq_num; + + struct ida doorbell_ida; + unsigned int max_doorbell_slices; + + int noretry; }; enum kfd_mempool { @@ -631,7 +636,7 @@ enum kfd_pdd_bound { PDD_BOUND_SUSPENDED, }; -#define MAX_SYSFS_FILENAME_LEN 11 +#define MAX_SYSFS_FILENAME_LEN 15 /* * SDMA counter runs at 100MHz frequency. @@ -692,6 +697,39 @@ struct kfd_process_device { uint64_t sdma_past_activity_counter; struct attribute attr_sdma; char sdma_filename[MAX_SYSFS_FILENAME_LEN]; + + /* Eviction activity tracking */ + uint64_t last_evict_timestamp; + atomic64_t evict_duration_counter; + struct attribute attr_evict; + + struct kobject *kobj_stats; + unsigned int doorbell_index; + + /* + * @cu_occupancy: Reports occupancy of Compute Units (CU) of a process + * that is associated with device encoded by "this" struct instance. The + * value reflects CU usage by all of the waves launched by this process + * on this device. A very important property of occupancy parameter is + * that its value is a snapshot of current use. + * + * Following is to be noted regarding how this parameter is reported: + * + * The number of waves that a CU can launch is limited by couple of + * parameters. These are encoded by struct amdgpu_cu_info instance + * that is part of every device definition. For GFX9 devices this + * translates to 40 waves (simd_per_cu * max_waves_per_simd) when waves + * do not use scratch memory and 32 waves (max_scratch_slots_per_cu) + * when they do use scratch memory. This could change for future + * devices and therefore this example should be considered as a guide. + * + * All CU's of a device are available for the process. This may not be true + * under certain conditions - e.g. CU masking. + * + * Finally number of CU's that are occupied by a process is affected by both + * number of CU's a device has along with number of other competing processes + */ + struct attribute attr_cu_occupancy; }; #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) @@ -728,8 +766,7 @@ struct kfd_process { /* We want to receive a notification when the mm_struct is destroyed */ struct mmu_notifier mmu_notifier; - uint16_t pasid; - unsigned int doorbell_index; + u32 pasid; /* * List of kfd_process_device structures, @@ -805,7 +842,7 @@ int kfd_process_create_wq(void); void kfd_process_destroy_wq(void); struct kfd_process *kfd_create_process(struct file *filep); struct kfd_process *kfd_get_process(const struct task_struct *); -struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); +struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); void kfd_unref_process(struct kfd_process *p); int kfd_process_evict_queues(struct kfd_process *p); @@ -846,8 +883,8 @@ int kfd_pasid_init(void); void kfd_pasid_exit(void); bool kfd_set_pasid_limit(unsigned int new_limit); unsigned int kfd_get_pasid_limit(void); -unsigned int kfd_pasid_alloc(void); -void kfd_pasid_free(unsigned int pasid); +u32 kfd_pasid_alloc(void); +void kfd_pasid_free(u32 pasid); /* Doorbells */ size_t kfd_doorbell_process_slice(struct kfd_dev *kfd); @@ -862,13 +899,13 @@ u32 read_kernel_doorbell(u32 __iomem *db); void write_kernel_doorbell(void __iomem *db, u32 value); void write_kernel_doorbell64(void __iomem *db, u64 value); unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, - struct kfd_process *process, + struct kfd_process_device *pdd, unsigned int doorbell_id); -phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, - struct kfd_process *process); -int kfd_alloc_process_doorbells(struct kfd_process *process); -void kfd_free_process_doorbells(struct kfd_process *process); - +phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd); +int kfd_alloc_process_doorbells(struct kfd_dev *kfd, + unsigned int *doorbell_index); +void kfd_free_process_doorbells(struct kfd_dev *kfd, + unsigned int doorbell_index); /* GTT Sub-Allocator */ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, @@ -933,7 +970,7 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm); struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, enum kfd_queue_type type); void kernel_queue_uninit(struct kernel_queue *kq, bool hanging); -int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid); +int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid); /* Process Queue Manager */ struct process_queue_node { @@ -1055,12 +1092,12 @@ int kfd_wait_on_events(struct kfd_process *p, uint32_t num_events, void __user *data, bool all, uint32_t user_timeout_ms, uint32_t *wait_result); -void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, +void kfd_signal_event_interrupt(u32 pasid, uint32_t partial_id, uint32_t valid_id_bits); void kfd_signal_iommu_event(struct kfd_dev *dev, - unsigned int pasid, unsigned long address, - bool is_write_requested, bool is_execute_requested); -void kfd_signal_hw_exception_event(unsigned int pasid); + u32 pasid, unsigned long address, + bool is_write_requested, bool is_execute_requested); +void kfd_signal_hw_exception_event(u32 pasid); int kfd_set_event(struct kfd_process *p, uint32_t event_id); int kfd_reset_event(struct kfd_process *p, uint32_t event_id); int kfd_event_page_set(struct kfd_process *p, void *kernel_address, @@ -1071,7 +1108,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, uint64_t *event_page_offset, uint32_t *event_slot_index); int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); -void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, +void kfd_signal_vm_fault_event(struct kfd_dev *dev, u32 pasid, struct kfd_vm_fault_info *info); void kfd_signal_reset_event(struct kfd_dev *dev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a0e12a79ab7d..65803e153a22 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -249,6 +249,52 @@ cleanup: } } +/** + * @kfd_get_cu_occupancy() - Collect number of waves in-flight on this device + * by current process. Translates acquired wave count into number of compute units + * that are occupied. + * + * @atr: Handle of attribute that allows reporting of wave count. The attribute + * handle encapsulates GPU device it is associated with, thereby allowing collection + * of waves in flight, etc + * + * @buffer: Handle of user provided buffer updated with wave count + * + * Return: Number of bytes written to user buffer or an error value + */ +static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) +{ + int cu_cnt; + int wave_cnt; + int max_waves_per_cu; + struct kfd_dev *dev = NULL; + struct kfd_process *proc = NULL; + struct kfd_process_device *pdd = NULL; + + pdd = container_of(attr, struct kfd_process_device, attr_cu_occupancy); + dev = pdd->dev; + if (dev->kfd2kgd->get_cu_occupancy == NULL) + return -EINVAL; + + cu_cnt = 0; + proc = pdd->process; + if (pdd->qpd.queue_count == 0) { + pr_debug("Gpu-Id: %d has no active queues for process %d\n", + dev->id, proc->pasid); + return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt); + } + + /* Collect wave count from device if it supports */ + wave_cnt = 0; + max_waves_per_cu = 0; + dev->kfd2kgd->get_cu_occupancy(dev->kgd, proc->pasid, &wave_cnt, + &max_waves_per_cu); + + /* Translate wave count to number of compute units */ + cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu; + return snprintf(buffer, PAGE_SIZE, "%d\n", cu_cnt); +} + static ssize_t kfd_procfs_show(struct kobject *kobj, struct attribute *attr, char *buffer) { @@ -345,6 +391,32 @@ static ssize_t kfd_procfs_queue_show(struct kobject *kobj, return 0; } +static ssize_t kfd_procfs_stats_show(struct kobject *kobj, + struct attribute *attr, char *buffer) +{ + if (strcmp(attr->name, "evicted_ms") == 0) { + struct kfd_process_device *pdd = container_of(attr, + struct kfd_process_device, + attr_evict); + uint64_t evict_jiffies; + + evict_jiffies = atomic64_read(&pdd->evict_duration_counter); + + return snprintf(buffer, + PAGE_SIZE, + "%llu\n", + jiffies64_to_msecs(evict_jiffies)); + + /* Sysfs handle that gets CU occupancy is per device */ + } else if (strcmp(attr->name, "cu_occupancy") == 0) { + return kfd_get_cu_occupancy(attr, buffer); + } else { + pr_err("Invalid attribute"); + } + + return 0; +} + static struct attribute attr_queue_size = { .name = "size", .mode = KFD_SYSFS_FILE_MODE @@ -376,6 +448,19 @@ static struct kobj_type procfs_queue_type = { .default_attrs = procfs_queue_attrs, }; +static const struct sysfs_ops procfs_stats_ops = { + .show = kfd_procfs_stats_show, +}; + +static struct attribute *procfs_stats_attrs[] = { + NULL +}; + +static struct kobj_type procfs_stats_type = { + .sysfs_ops = &procfs_stats_ops, + .default_attrs = procfs_stats_attrs, +}; + int kfd_procfs_add_queue(struct queue *q) { struct kfd_process *proc; @@ -417,6 +502,72 @@ static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr, return ret; } +static int kfd_procfs_add_sysfs_stats(struct kfd_process *p) +{ + int ret = 0; + struct kfd_process_device *pdd; + char stats_dir_filename[MAX_SYSFS_FILENAME_LEN]; + + if (!p) + return -EINVAL; + + if (!p->kobj) + return -EFAULT; + + /* + * Create sysfs files for each GPU: + * - proc/<pid>/stats_<gpuid>/ + * - proc/<pid>/stats_<gpuid>/evicted_ms + * - proc/<pid>/stats_<gpuid>/cu_occupancy + */ + list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + struct kobject *kobj_stats; + + snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN, + "stats_%u", pdd->dev->id); + kobj_stats = kfd_alloc_struct(kobj_stats); + if (!kobj_stats) + return -ENOMEM; + + ret = kobject_init_and_add(kobj_stats, + &procfs_stats_type, + p->kobj, + stats_dir_filename); + + if (ret) { + pr_warn("Creating KFD proc/stats_%s folder failed", + stats_dir_filename); + kobject_put(kobj_stats); + goto err; + } + + pdd->kobj_stats = kobj_stats; + pdd->attr_evict.name = "evicted_ms"; + pdd->attr_evict.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&pdd->attr_evict); + ret = sysfs_create_file(kobj_stats, &pdd->attr_evict); + if (ret) + pr_warn("Creating eviction stats for gpuid %d failed", + (int)pdd->dev->id); + + /* Add sysfs file to report compute unit occupancy */ + if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) { + pdd->attr_cu_occupancy.name = "cu_occupancy"; + pdd->attr_cu_occupancy.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&pdd->attr_cu_occupancy); + ret = sysfs_create_file(kobj_stats, + &pdd->attr_cu_occupancy); + if (ret) + pr_warn("Creating %s failed for gpuid: %d", + pdd->attr_cu_occupancy.name, + (int)pdd->dev->id); + } + } +err: + return ret; +} + + static int kfd_procfs_add_sysfs_files(struct kfd_process *p) { int ret = 0; @@ -452,7 +603,6 @@ static int kfd_procfs_add_sysfs_files(struct kfd_process *p) return ret; } - void kfd_procfs_del_queue(struct queue *q) { if (!q) @@ -660,6 +810,11 @@ struct kfd_process *kfd_create_process(struct file *filep) if (!process->kobj_queues) pr_warn("Creating KFD proc/queues folder failed"); + ret = kfd_procfs_add_sysfs_stats(process); + if (ret) + pr_warn("Creating sysfs stats dir for pid %d failed", + (int)process->lead_thread->pid); + ret = kfd_procfs_add_sysfs_files(process); if (ret) pr_warn("Creating sysfs usage file for pid %d failed", @@ -781,6 +936,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) kfree(pdd->qpd.doorbell_bitmap); idr_destroy(&pdd->alloc_idr); + kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index); + /* * before destroying pdd, make sure to report availability * for auto suspend @@ -816,6 +973,12 @@ static void kfd_process_wq_release(struct work_struct *work) list_for_each_entry(pdd, &p->per_device_data, per_device_list) { sysfs_remove_file(p->kobj, &pdd->attr_vram); sysfs_remove_file(p->kobj, &pdd->attr_sdma); + sysfs_remove_file(p->kobj, &pdd->attr_evict); + if (pdd->dev->kfd2kgd->get_cu_occupancy != NULL) + sysfs_remove_file(p->kobj, &pdd->attr_cu_occupancy); + kobject_del(pdd->kobj_stats); + kobject_put(pdd->kobj_stats); + pdd->kobj_stats = NULL; } kobject_del(p->kobj); @@ -833,8 +996,6 @@ static void kfd_process_wq_release(struct work_struct *work) kfd_event_free_process(p); kfd_pasid_free(p->pasid); - kfd_free_process_doorbells(p); - mutex_destroy(&p->mutex); put_task_struct(p->lead_thread); @@ -1012,9 +1173,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (process->pasid == 0) goto err_alloc_pasid; - if (kfd_alloc_process_doorbells(process) < 0) - goto err_alloc_doorbells; - err = pqm_init(&process->pqm, process); if (err != 0) goto err_process_pqm_init; @@ -1042,8 +1200,6 @@ err_register_notifier: err_init_apertures: pqm_uninit(&process->pqm); err_process_pqm_init: - kfd_free_process_doorbells(process); -err_alloc_doorbells: kfd_pasid_free(process->pasid); err_alloc_pasid: mutex_destroy(&process->mutex); @@ -1106,10 +1262,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, if (!pdd) return NULL; + if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) { + pr_err("Failed to alloc doorbell for pdd\n"); + goto err_free_pdd; + } + if (init_doorbell_bitmap(&pdd->qpd, dev)) { pr_err("Failed to init doorbell for process\n"); - kfree(pdd); - return NULL; + goto err_free_pdd; } pdd->dev = dev; @@ -1125,12 +1285,17 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, pdd->runtime_inuse = false; pdd->vram_usage = 0; pdd->sdma_past_activity_counter = 0; + atomic64_set(&pdd->evict_duration_counter, 0); list_add(&pdd->per_device_list, &p->per_device_data); /* Init idr used for memory handle translation */ idr_init(&pdd->alloc_idr); return pdd; + +err_free_pdd: + kfree(pdd); + return NULL; } /** @@ -1307,7 +1472,7 @@ void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, } /* This increments the process->ref counter. */ -struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid) +struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid) { struct kfd_process *p, *ret_p = NULL; unsigned int temp; @@ -1488,6 +1653,7 @@ void kfd_suspend_all_processes(void) unsigned int temp; int idx = srcu_read_lock(&kfd_processes_srcu); + WARN(debug_evictions, "Evicting all processes"); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); |
