aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c444
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c145
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c97
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c11
-rw-r--r--drivers/gpu/drm/bridge/synopsys/dw-hdmi.c41
-rw-r--r--drivers/gpu/drm/bridge/ti-tfp410.c4
-rw-r--r--drivers/gpu/drm/drm_connector.c2
-rw-r--r--drivers/gpu/drm/drm_property.c2
-rw-r--r--drivers/gpu/drm/i915/Kconfig.debug2
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_shrinker.c4
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pm.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_engine_pool.c2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_mocs.c10
-rw-r--r--drivers/gpu/drm/i915/gvt/dmabuf.c4
-rw-r--r--drivers/gpu/drm/i915/i915_active.c2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c3
-rw-r--r--drivers/gpu/drm/i915/i915_request.c2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_svm.c230
-rw-r--r--drivers/gpu/drm/radeon/cik.c94
-rw-r--r--drivers/gpu/drm/radeon/radeon.h9
-rw-r--r--drivers/gpu/drm/radeon/radeon_mn.c218
-rw-r--r--drivers/gpu/drm/radeon/si.c97
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_tcon.c2
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_vm.c174
-rw-r--r--drivers/gpu/drm/vmwgfx/Kconfig1
-rw-r--r--drivers/gpu/drm/vmwgfx/Makefile2
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h233
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.c10
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h44
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c488
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource.c193
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h13
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_surface.c395
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c15
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_validation.c74
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_validation.h16
47 files changed, 2215 insertions, 1065 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index bcc5d40a8d5f..0c229a92a24b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -967,6 +967,8 @@ struct amdgpu_device {
struct mutex lock_reset;
struct amdgpu_doorbell_index doorbell_index;
+ struct mutex notifier_lock;
+
int asic_reset_res;
struct work_struct xgmi_reset_work;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 12dbcfaa34b8..888209eb8cec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -518,8 +518,7 @@ static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
*
* Returns 0 for success, negative errno for errors.
*/
-static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
- uint64_t user_addr)
+static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr)
{
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
@@ -1212,7 +1211,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
if (user_addr) {
- ret = init_user_pages(*mem, current->mm, user_addr);
+ ret = init_user_pages(*mem, user_addr);
if (ret)
goto allocate_init_user_pages_failed;
}
@@ -1757,6 +1756,10 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
return ret;
}
+ /*
+ * FIXME: Cannot ignore the return code, must hold
+ * notifier_lock
+ */
amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
/* Mark the BO as valid unless it was invalidated
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index a169ff16277f..5ca905b4a0fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -538,8 +538,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
e->tv.num_shared = 2;
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
- if (p->bo_list->first_userptr != p->bo_list->num_entries)
- p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
INIT_LIST_HEAD(&duplicates);
amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
@@ -1219,11 +1217,11 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
if (r)
goto error_unlock;
- /* No memory allocation is allowed while holding the mn lock.
- * p->mn is hold until amdgpu_cs_submit is finished and fence is added
- * to BOs.
+ /* No memory allocation is allowed while holding the notifier lock.
+ * The lock is held until amdgpu_cs_submit is finished and fence is
+ * added to BOs.
*/
- amdgpu_mn_lock(p->mn);
+ mutex_lock(&p->adev->notifier_lock);
/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
* -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
@@ -1266,13 +1264,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
- amdgpu_mn_unlock(p->mn);
+ mutex_unlock(&p->adev->notifier_lock);
return 0;
error_abort:
drm_sched_job_cleanup(&job->base);
- amdgpu_mn_unlock(p->mn);
+ mutex_unlock(&p->adev->notifier_lock);
error_unlock:
amdgpu_job_free(job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 4f76beafb2fa..c17505fba988 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2794,6 +2794,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
mutex_init(&adev->virt.vf_errors.lock);
hash_init(adev->mn_hash);
mutex_init(&adev->lock_reset);
+ mutex_init(&adev->notifier_lock);
mutex_init(&adev->virt.dpm_mutex);
mutex_init(&adev->psp.mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 392300f77b13..828b5167ff12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -51,439 +51,107 @@
#include "amdgpu_amdkfd.h"
/**
- * struct amdgpu_mn_node
+ * amdgpu_mn_invalidate_gfx - callback to notify about mm change
*
- * @it: interval node defining start-last of the affected address range
- * @bos: list of all BOs in the affected address range
- *
- * Manages all BOs which are affected of a certain range of address space.
- */
-struct amdgpu_mn_node {
- struct interval_tree_node it;
- struct list_head bos;
-};
-
-/**
- * amdgpu_mn_destroy - destroy the HMM mirror
- *
- * @work: previously sheduled work item
- *
- * Lazy destroys the notifier from a work item
- */
-static void amdgpu_mn_destroy(struct work_struct *work)
-{
- struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
- struct amdgpu_device *adev = amn->adev;
- struct amdgpu_mn_node *node, *next_node;
- struct amdgpu_bo *bo, *next_bo;
-
- mutex_lock(&adev->mn_lock);
- down_write(&amn->lock);
- hash_del(&amn->node);
- rbtree_postorder_for_each_entry_safe(node, next_node,
- &amn->objects.rb_root, it.rb) {
- list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
- bo->mn = NULL;
- list_del_init(&bo->mn_list);
- }
- kfree(node);
- }
- up_write(&amn->lock);
- mutex_unlock(&adev->mn_lock);
-
- hmm_mirror_unregister(&amn->mirror);
- kfree(amn);
-}
-
-/**
- * amdgpu_hmm_mirror_release - callback to notify about mm destruction
- *
- * @mirror: the HMM mirror (mm) this callback is about
- *
- * Shedule a work item to lazy destroy HMM mirror.
- */
-static void amdgpu_hmm_mirror_release(struct hmm_mirror *mirror)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
-
- INIT_WORK(&amn->work, amdgpu_mn_destroy);
- schedule_work(&amn->work);
-}
-
-/**
- * amdgpu_mn_lock - take the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_lock(struct amdgpu_mn *mn)
-{
- if (mn)
- down_write(&mn->lock);
-}
-
-/**
- * amdgpu_mn_unlock - drop the write side lock for this notifier
- *
- * @mn: our notifier
- */
-void amdgpu_mn_unlock(struct amdgpu_mn *mn)
-{
- if (mn)
- up_write(&mn->lock);
-}
-
-/**
- * amdgpu_mn_read_lock - take the read side lock for this notifier
- *
- * @amn: our notifier
- * @blockable: is the notifier blockable
- */
-static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
-{
- if (blockable)
- down_read(&amn->lock);
- else if (!down_read_trylock(&amn->lock))
- return -EAGAIN;
-
- return 0;
-}
-
-/**
- * amdgpu_mn_read_unlock - drop the read side lock for this notifier
- *
- * @amn: our notifier
- */
-static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
-{
- up_read(&amn->lock);
-}
-
-/**
- * amdgpu_mn_invalidate_node - unmap all BOs of a node
- *
- * @node: the node with the BOs to unmap
- * @start: start of address range affected
- * @end: end of address range affected
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
*
* Block for operations on BOs to finish and mark pages as accessed and
* potentially dirty.
*/
-static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
- unsigned long start,
- unsigned long end)
+static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct amdgpu_bo *bo;
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
long r;
- list_for_each_entry(bo, &node->bos, mn_list) {
-
- if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end))
- continue;
-
- r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,
- true, false, MAX_SCHEDULE_TIMEOUT);
- if (r <= 0)
- DRM_ERROR("(%ld) failed to wait for user bo\n", r);
- }
-}
-
-/**
- * amdgpu_mn_sync_pagetables_gfx - callback to notify about mm change
- *
- * @mirror: the hmm_mirror (mm) is about to update
- * @update: the update start, end address
- *
- * Block for operations on BOs to finish and mark pages as accessed and
- * potentially dirty.
- */
-static int
-amdgpu_mn_sync_pagetables_gfx(struct hmm_mirror *mirror,
- const struct mmu_notifier_range *update)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
- unsigned long start = update->start;
- unsigned long end = update->end;
- bool blockable = mmu_notifier_range_blockable(update);
- struct interval_tree_node *it;
+ if (!mmu_notifier_range_blockable(range))
+ return false;
- /* notification is exclusive, but interval is inclusive */
- end -= 1;
+ mutex_lock(&adev->notifier_lock);
- /* TODO we should be able to split locking for interval tree and
- * amdgpu_mn_invalidate_node
- */
- if (amdgpu_mn_read_lock(amn, blockable))
- return -EAGAIN;
+ mmu_interval_set_seq(mni, cur_seq);
- it = interval_tree_iter_first(&amn->objects, start, end);
- while (it) {
- struct amdgpu_mn_node *node;
-
- if (!blockable) {
- amdgpu_mn_read_unlock(amn);
- return -EAGAIN;
- }
-
- node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, start, end);
-
- amdgpu_mn_invalidate_node(node, start, end);
- }
-
- amdgpu_mn_read_unlock(amn);
-
- return 0;
-}
-
-/**
- * amdgpu_mn_sync_pagetables_hsa - callback to notify about mm change
- *
- * @mirror: the hmm_mirror (mm) is about to update
- * @update: the update start, end address
- *
- * We temporarily evict all BOs between start and end. This
- * necessitates evicting all user-mode queues of the process. The BOs
- * are restorted in amdgpu_mn_invalidate_range_end_hsa.
- */
-static int
-amdgpu_mn_sync_pagetables_hsa(struct hmm_mirror *mirror,
- const struct mmu_notifier_range *update)
-{
- struct amdgpu_mn *amn = container_of(mirror, struct amdgpu_mn, mirror);
- unsigned long start = update->start;
- unsigned long end = update->end;
- bool blockable = mmu_notifier_range_blockable(update);
- struct interval_tree_node *it;
-
- /* notification is exclusive, but interval is inclusive */
- end -= 1;
-
- if (amdgpu_mn_read_lock(amn, blockable))
- return -EAGAIN;
-
- it = interval_tree_iter_first(&amn->objects, start, end);
- while (it) {
- struct amdgpu_mn_node *node;
- struct amdgpu_bo *bo;
-
- if (!blockable) {
- amdgpu_mn_read_unlock(amn);
- return -EAGAIN;
- }
-
- node = container_of(it, struct amdgpu_mn_node, it);
- it = interval_tree_iter_next(it, start, end);
-
- list_for_each_entry(bo, &node->bos, mn_list) {
- struct kgd_mem *mem = bo->kfd_bo;
-
- if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
- start, end))
- amdgpu_amdkfd_evict_userptr(mem, amn->mm);
- }
- }
-
- amdgpu_mn_read_unlock(amn);
-
- return 0;
+ r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false,
+ MAX_SCHEDULE_TIMEOUT);
+ mutex_unlock(&adev->notifier_lock);
+ if (r <= 0)
+ DRM_ERROR("(%ld) failed to wait for user bo\n", r);
+ return true;
}
-/* Low bits of any reasonable mm pointer will be unused due to struct
- * alignment. Use these bits to make a unique key from the mm pointer
- * and notifier type.
- */
-#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
-
-static struct hmm_mirror_ops amdgpu_hmm_mirror_ops[] = {
- [AMDGPU_MN_TYPE_GFX] = {
- .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_gfx,
- .release = amdgpu_hmm_mirror_release
- },
- [AMDGPU_MN_TYPE_HSA] = {
- .sync_cpu_device_pagetables = amdgpu_mn_sync_pagetables_hsa,
- .release = amdgpu_hmm_mirror_release
- },
+static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = {
+ .invalidate = amdgpu_mn_invalidate_gfx,
};
/**
- * amdgpu_mn_get - create HMM mirror context
+ * amdgpu_mn_invalidate_hsa - callback to notify about mm change
*
- * @adev: amdgpu device pointer
- * @type: type of MMU notifier context
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
*
- * Creates a HMM mirror context for current->mm.
+ * We temporarily evict the BO attached to this range. This necessitates
+ * evicting all user-mode queues of the process.
*/
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type)
+static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct mm_struct *mm = current->mm;
- struct amdgpu_mn *amn;
- unsigned long key = AMDGPU_MN_KEY(mm, type);
- int r;
-
- mutex_lock(&adev->mn_lock);
- if (down_write_killable(&mm->mmap_sem)) {
- mutex_unlock(&adev->mn_lock);
- return ERR_PTR(-EINTR);
- }
-
- hash_for_each_possible(adev->mn_hash, amn, node, key)
- if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)
- goto release_locks;
-
- amn = kzalloc(sizeof(*amn), GFP_KERNEL);
- if (!amn) {
- amn = ERR_PTR(-ENOMEM);
- goto release_locks;
- }
-
- amn->adev = adev;
- amn->mm = mm;
- init_rwsem(&amn->lock);
- amn->type = type;
- amn->objects = RB_ROOT_CACHED;
-
- amn->mirror.ops = &amdgpu_hmm_mirror_ops[type];
- r = hmm_mirror_register(&amn->mirror, mm);
- if (r)
- goto free_amn;
+ struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type));
+ if (!mmu_notifier_range_blockable(range))
+ return false;
-release_locks:
- up_write(&mm->mmap_sem);
- mutex_unlock(&adev->mn_lock);
+ mutex_lock(&adev->notifier_lock);
- return amn;
+ mmu_interval_set_seq(mni, cur_seq);
-free_amn:
- up_write(&mm->mmap_sem);
- mutex_unlock(&adev->mn_lock);
- kfree(amn);
+ amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm);
+ mutex_unlock(&adev->notifier_lock);
- return ERR_PTR(r);
+ return true;
}
+static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = {
+ .invalidate = amdgpu_mn_invalidate_hsa,
+};
+
/**
* amdgpu_mn_register - register a BO for notifier updates
*
* @bo: amdgpu buffer object
* @addr: userptr addr we should monitor
*
- * Registers an HMM mirror for the given BO at the specified address.
+ * Registers a mmu_notifier for the given BO at the specified address.
* Returns 0 on success, -ERRNO if anything goes wrong.
*/
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
- unsigned long end = addr + amdgpu_bo_size(bo) - 1;
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- enum amdgpu_mn_type type =
- bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
- struct amdgpu_mn *amn;
- struct amdgpu_mn_node *node = NULL, *new_node;
- struct list_head bos;
- struct interval_tree_node *it;
-
- amn = amdgpu_mn_get(adev, type);
- if (IS_ERR(amn))
- return PTR_ERR(amn);
-
- new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
- if (!new_node)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&bos);
-
- down_write(&amn->lock);
-
- while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {
- kfree(node);
- node = container_of(it, struct amdgpu_mn_node, it);
- interval_tree_remove(&node->it, &amn->objects);
- addr = min(it->start, addr);
- end = max(it->last, end);
- list_splice(&node->bos, &bos);
- }
-
- if (!node)
- node = new_node;
- else
- kfree(new_node);
-
- bo->mn = amn;
-
- node->it.start = addr;
- node->it.last = end;
- INIT_LIST_HEAD(&node->bos);
- list_splice(&bos, &node->bos);
- list_add(&bo->mn_list, &node->bos);
-
- interval_tree_insert(&node->it, &amn->objects);
-
- up_write(&amn->lock);
-
- return 0;
+ if (bo->kfd_bo)
+ return mmu_interval_notifier_insert(&bo->notifier, current->mm,
+ addr, amdgpu_bo_size(bo),
+ &amdgpu_mn_hsa_ops);
+ return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
+ amdgpu_bo_size(bo),
+ &amdgpu_mn_gfx_ops);
}
/**
- * amdgpu_mn_unregister - unregister a BO for HMM mirror updates
+ * amdgpu_mn_unregister - unregister a BO for notifier updates
*
* @bo: amdgpu buffer object
*
- * Remove any registration of HMM mirror updates from the buffer object.
+ * Remove any registration of mmu notifier updates from the buffer object.
*/
void amdgpu_mn_unregister(struct amdgpu_bo *bo)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- struct amdgpu_mn *amn;
- struct list_head *head;
-
- mutex_lock(&adev->mn_lock);
-
- amn = bo->mn;
- if (amn == NULL) {
- mutex_unlock(&adev->mn_lock);
+ if (!bo->notifier.mm)
return;
- }
-
- down_write(&amn->lock);
-
- /* save the next list entry for later */
- head = bo->mn_list.next;
-
- bo->mn = NULL;
- list_del_init(&bo->mn_list);
-
- if (list_empty(head)) {
- struct amdgpu_mn_node *node;
-
- node = container_of(head, struct amdgpu_mn_node, bos);
- interval_tree_remove(&node->it, &amn->objects);
- kfree(node);
- }
-
- up_write(&amn->lock);
- mutex_unlock(&adev->mn_lock);
-}
-
-/* flags used by HMM internal, not related to CPU/GPU PTE flags */
-static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
- (1 << 0), /* HMM_PFN_VALID */
- (1 << 1), /* HMM_PFN_WRITE */
- 0 /* HMM_PFN_DEVICE_PRIVATE */
-};
-
-static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
- 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
- 0, /* HMM_PFN_NONE */
- 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
-};
-
-void amdgpu_hmm_init_range(struct hmm_range *range)
-{
- if (range) {
- range->flags = hmm_range_flags;
- range->values = hmm_range_values;
- range->pfn_shift = PAGE_SHIFT;
- }
+ mmu_interval_notifier_remove(&bo->notifier);
+ bo->notifier.mm = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index b8ed68943625..a292238f75eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -30,63 +30,10 @@
#include <linux/workqueue.h>
#include <linux/interval_tree.h>
-enum amdgpu_mn_type {
- AMDGPU_MN_TYPE_GFX,
- AMDGPU_MN_TYPE_HSA,
-};
-
-/**
- * struct amdgpu_mn
- *
- * @adev: amdgpu device pointer
- * @mm: process address space
- * @type: type of MMU notifier
- * @work: destruction work item
- * @node: hash table node to find structure by adev and mn
- * @lock: rw semaphore protecting the notifier nodes
- * @objects: interval tree containing amdgpu_mn_nodes
- * @mirror: HMM mirror function support
- *
- * Data for each amdgpu device and process address space.
- */
-struct amdgpu_mn {
- /* constant after initialisation */
- struct amdgpu_device *adev;
- struct mm_struct *mm;
- enum amdgpu_mn_type type;
-
- /* only used on destruction */
- struct work_struct work;
-
- /* protected by adev->mn_lock */
- struct hlist_node node;
-
- /* objects protected by lock */
- struct rw_semaphore lock;
- struct rb_root_cached objects;
-
-#ifdef CONFIG_HMM_MIRROR
- /* HMM mirror */
- struct hmm_mirror mirror;
-#endif
-};
-
#if defined(CONFIG_HMM_MIRROR)
-void amdgpu_mn_lock(struct amdgpu_mn *mn);
-void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
-void amdgpu_hmm_init_range(struct hmm_range *range);
#else
-static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
-static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
-static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
- enum amdgpu_mn_type type)
-{
- return NULL;
-}
static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 7e99f6c58c48..36dec51d1ef1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -30,6 +30,9 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
+#ifdef CONFIG_MMU_NOTIFIER
+#include <linux/mmu_notifier.h>
+#endif
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
#define AMDGPU_BO_MAX_PLACEMENTS 3
@@ -101,10 +104,12 @@ struct amdgpu_bo {
struct ttm_bo_kmap_obj dma_buf_vmap;
struct amdgpu_mn *mn;
- union {
- struct list_head mn_list;
- struct list_head shadow_list;
- };
+
+#ifdef CONFIG_MMU_NOTIFIER
+ struct mmu_interval_notifier notifier;
+#endif
+
+ struct list_head shadow_list;
struct kgd_mem *kfd_bo;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 61d9b7774d42..2616e2eafdeb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -35,6 +35,7 @@
#include <linux/hmm.h>
#include <linux/pagemap.h>
#include <linux/sched/task.h>
+#include <linux/sched/mm.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/swap.h>
@@ -769,6 +770,20 @@ struct amdgpu_ttm_tt {
#endif
};
+#ifdef CONFIG_DRM_AMDGPU_USERPTR
+/* flags used by HMM internal, not related to CPU/GPU PTE flags */
+static const uint64_t hmm_range_flags[HMM_PFN_FLAG_MAX] = {
+ (1 << 0), /* HMM_PFN_VALID */
+ (1 << 1), /* HMM_PFN_WRITE */
+ 0 /* HMM_PFN_DEVICE_PRIVATE */
+};
+
+static const uint64_t hmm_range_values[HMM_PFN_VALUE_MAX] = {
+ 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
+ 0, /* HMM_PFN_NONE */
+ 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
+};
+
/**
* amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user
* memory and start HMM tracking CPU page table update
@@ -776,85 +791,89 @@ struct amdgpu_ttm_tt {
* Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
* once afterwards to stop HMM tracking
*/
-#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-
-#define MAX_RETRY_HMM_RANGE_FAULT 16
-
int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
{
- struct hmm_mirror *mirror = bo->mn ? &bo->mn->mirror : NULL;
struct ttm_tt *ttm = bo->tbo.ttm;
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- struct mm_struct *mm = gtt->usertask->mm;
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
struct hmm_range *range;
+ unsigned long timeout;
+ struct mm_struct *mm;
unsigned long i;
- uint64_t *pfns;
int r = 0;
- if (!mm) /* Happens during process shutdown */
- return -ESRCH;
-
- if (unlikely(!mirror)) {
- DRM_DEBUG_DRIVER("Failed to get hmm_mirror\n");
- r = -EFAULT;
- goto out;
+ mm = bo->notifier.mm;
+ if (unlikely(!mm)) {
+ DRM_DEBUG_DRIVER("BO is not registered?\n");
+ return -EFAULT;
}
- vma = find_vma(mm, start);
- if (unlikely(!vma || start < vma->vm_start)) {
- r = -EFAULT;
- goto out;
- }
- if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
- vma->vm_file)) {
- r = -EPERM;
- goto out;
- }
+ /* Another get_user_pages is running at the same time?? */
+ if (WARN_ON(gtt->range))
+ return -EFAULT;
+
+ if (!mmget_not_zero(mm)) /* Happens during process shutdown */
+ return -ESRCH;
range = kzalloc(sizeof(*range), GFP_KERNEL);
if (unlikely(!range)) {
r = -ENOMEM;
goto out;
}
+ range->notifier = &bo->notifier;
+ range->flags = hmm_range_flags;
+ range->values = hmm_range_values;
+ range->pfn_shift = PAGE_SHIFT;
+ range->start = bo->notifier.interval_tree.start;
+ range->end = bo->notifier.interval_tree.last + 1;
+ range->default_flags = hmm_range_flags[HMM_PFN_VALID];
+ if (!amdgpu_ttm_tt_is_readonly(ttm))
+ range->default_flags |= range->flags[HMM_PFN_WRITE];
- pfns = kvmalloc_array(ttm->num_pages, sizeof(*pfns), GFP_KERNEL);
- if (unlikely(!pfns)) {
+ range->pfns = kvmalloc_array(ttm->num_pages, sizeof(*range->pfns),
+ GFP_KERNEL);
+ if (unlikely(!range->pfns)) {
r = -ENOMEM;
goto out_free_ranges;
}
- amdgpu_hmm_init_range(range);
- range->default_flags = range->flags[HMM_PFN_VALID];
- range->default_flags |= amdgpu_ttm_tt_is_readonly(ttm) ?
- 0 : range->flags[HMM_PFN_WRITE];
- range->pfn_flags_mask = 0;
- range->pfns = pfns;
- range->start = start;
- range->end = start + ttm->num_pages * PAGE_SIZE;
-
- hmm_range_register(range, mirror);
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+ if (unlikely(!vma || start < vma->vm_start)) {
+ r = -EFAULT;
+ goto out_unlock;
+ }
+ if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
+ vma->vm_file)) {
+ r = -EPERM;
+ goto out_unlock;
+ }
+ up_read(&mm->mmap_sem);
+ timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
- /*
- * Just wait for range to be valid, safe to ignore return value as we
- * will use the return value of hmm_range_fault() below under the
- * mmap_sem to ascertain the validity of the range.
- */
- hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT);
+retry:
+ range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
down_read(&mm->mmap_sem);
r = hmm_range_fault(range, 0);
up_read(&mm->mmap_sem);
-
- if (unlikely(r < 0))
+ if (unlikely(r <= 0)) {
+ /*
+ * FIXME: This timeout should encompass the retry from
+ * mmu_interval_read_retry() as well.
+ */
+ if ((r == 0 || r == -EBUSY) && !time_after(jiffies, timeout))
+ goto retry;
goto out_free_pfns;
+ }
for (i = 0; i < ttm->num_pages; i++) {
- pages[i] = hmm_device_entry_to_page(range, pfns[i]);
+ /* FIXME: The pages cannot be touched outside the notifier_lock */
+ pages[i] = hmm_device_entry_to_page(range, range->pfns[i]);
if (unlikely(!pages[i])) {
pr_err("Page fault failed for pfn[%lu] = 0x%llx\n",
- i, pfns[i]);
+ i, range->pfns[i]);
r = -ENOMEM;
goto out_free_pfns;
@@ -862,15 +881,18 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
}
gtt->range = range;
+ mmput(mm);
return 0;
+out_unlock:
+ up_read(&mm->mmap_sem);
out_free_pfns:
- hmm_range_unregister(range);
- kvfree(pfns);
+ kvfree(range->pfns);
out_free_ranges:
kfree(range);
out:
+ mmput(mm);
return r;
}
@@ -895,15 +917,18 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
"No user pages to check\n");
if (gtt->range) {
- r = hmm_range_valid(gtt->range);
- hmm_range_unregister(gtt->range);
-
+ /*
+ * FIXME: Must always hold notifier_lock for this, and must
+ * not ignore the return code.
+ */
+ r = mmu_interval_read_retry(gtt->range->notifier,
+ gtt->range->notifier_seq);
kvfree(gtt->range->pfns);
kfree(gtt->range);
gtt->range = NULL;
}
- return r;
+ return !r;
}
#endif
@@ -984,10 +1009,18 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
sg_free_table(ttm->sg);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
- if (gtt->range &&
- ttm->pages[0] == hmm_device_entry_to_page(gtt->range,
- gtt->range->pfns[0]))
- WARN_ONCE(1, "Missing get_user_page_done\n");
+ if (gtt->range) {
+ unsigned long i;
+
+ for (i = 0; i < ttm->num_pages; i++) {
+ if (ttm->pages[i] !=
+ hmm_device_entry_to_page(gtt->range,
+ gtt->range->pfns[i]))
+ break;
+ }
+
+ WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
+ }
#endif
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index b22a10b2d201..1befdee9f0f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1444,7 +1444,6 @@ static int cik_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
{
struct pci_dev *root = adev->pdev->bus->self;
- int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -1479,12 +1478,7 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
}
- bridge_pos = pci_pcie_cap(root);
- if (!bridge_pos)
- return;
-
- gpu_pos = pci_pcie_cap(adev->pdev);
- if (!gpu_pos)
+ if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
return;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
@@ -1494,14 +1488,17 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+ tmp16);
tmp = RREG32_PCIE(ixPCIE_LC_STATUS1);
max_lw = (tmp & PCIE_LC_STATUS1__LC_DETECTED_LINK_WIDTH_MASK) >>
@@ -1525,15 +1522,23 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
for (i = 0; i < 10; i++) {
/* check status */
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
tmp |= PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1546,26 +1551,45 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
msleep(100);
/* linkctl */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+ tmp16);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ tmp16);
/* linkctl2 */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
tmp = RREG32_PCIE(ixPCIE_LC_CNTL4);
tmp &= ~PCIE_LC_CNTL4__LC_SET_QUIESCE_MASK;
@@ -1580,15 +1604,16 @@ static void cik_pcie_gen3_enable(struct amdgpu_device *adev)
speed_cntl &= ~PCIE_LC_SPEED_CNTL__LC_FORCE_DIS_SW_SPEED_CHANGE_MASK;
WREG32_PCIE(ixPCIE_LC_SPEED_CNTL, speed_cntl);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~0xf;
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
- tmp16 |= 3; /* gen3 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
- tmp16 |= 2; /* gen2 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
- tmp16 |= 1; /* gen1 */
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
speed_cntl = RREG32_PCIE(ixPCIE_LC_SPEED_CNTL);
speed_cntl |= PCIE_LC_SPEED_CNTL__LC_INITIATE_LINK_SPEED_CHANGE_MASK;
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 29024e64c886..f2d70a47a3af 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1644,7 +1644,6 @@ static void si_init_golden_registers(struct amdgpu_device *adev)
static void si_pcie_gen3_enable(struct amdgpu_device *adev)
{
struct pci_dev *root = adev->pdev->bus->self;
- int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -1679,12 +1678,7 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with amdgpu.pcie_gen2=0\n");
}
- bridge_pos = pci_pcie_cap(root);
- if (!bridge_pos)
- return;
-
- gpu_pos = pci_pcie_cap(adev->pdev);
- if (!gpu_pos)
+ if (!pci_is_pcie(root) || !pci_is_pcie(adev->pdev))
return;
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
@@ -1693,14 +1687,17 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL,
+ tmp16);
tmp = RREG32_PCIE(PCIE_LC_STATUS1);
max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
@@ -1717,15 +1714,23 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
}
for (i = 0; i < 10; i++) {
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp |= LC_SET_QUIESCE;
@@ -1737,25 +1742,44 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
mdelay(100);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+ tmp16);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
-
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL,
+ tmp16);
+
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(adev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp &= ~LC_SET_QUIESCE;
@@ -1768,15 +1792,16 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
- pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~0xf;
+ pcie_capability_read_word(adev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
+
if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
- tmp16 |= 3;
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
- tmp16 |= 2;
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
- tmp16 |= 1;
- pci_write_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(adev->pdev, PCI_EXP_LNKCTL2, tmp16);
speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 9af45d07515b..1544007af34a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -49,7 +49,7 @@ static const char kfd_dev_name[] = "kfd";
static const struct file_operations kfd_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = kfd_ioctl,
- .compat_ioctl = kfd_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.open = kfd_open,
.mmap = kfd_mmap,
};
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
index 20f4f92dd866..d7e65c869415 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c
@@ -160,12 +160,23 @@ static int dw_hdmi_i2s_get_dai_id(struct snd_soc_component *component,
return -EINVAL;
}
+static int dw_hdmi_i2s_hook_plugged_cb(struct device *dev, void *data,
+ hdmi_codec_plugged_cb fn,
+ struct device *codec_dev)
+{
+ struct dw_hdmi_i2s_audio_data *audio = data;
+ struct dw_hdmi *hdmi = audio->hdmi;
+
+ return dw_hdmi_set_plugged_cb(hdmi, fn, codec_dev);
+}
+
static struct hdmi_codec_ops dw_hdmi_i2s_ops = {
.hw_params = dw_hdmi_i2s_hw_params,
.audio_startup = dw_hdmi_i2s_audio_startup,
.audio_shutdown = dw_hdmi_i2s_audio_shutdown,
.get_eld = dw_hdmi_i2s_get_eld,
.get_dai_id = dw_hdmi_i2s_get_dai_id,
+ .hook_plugged_cb = dw_hdmi_i2s_hook_plugged_cb,
};
static int snd_dw_hdmi_probe(struct platform_device *pdev)
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index dbe38a54870b..67fca439bbfb 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -194,6 +194,10 @@ struct dw_hdmi {
struct mutex cec_notifier_mutex;
struct cec_notifier *cec_notifier;
+
+ hdmi_codec_plugged_cb plugged_cb;
+ struct device *codec_dev;
+ enum drm_connector_status last_connector_result;
};
#define HDMI_IH_PHY_STAT0_RX_SENSE \
@@ -218,6 +222,28 @@ static inline u8 hdmi_readb(struct dw_hdmi *hdmi, int offset)
return val;
}
+static void handle_plugged_change(struct dw_hdmi *hdmi, bool plugged)
+{
+ if (hdmi->plugged_cb && hdmi->codec_dev)
+ hdmi->plugged_cb(hdmi->codec_dev, plugged);
+}
+
+int dw_hdmi_set_plugged_cb(struct dw_hdmi *hdmi, hdmi_codec_plugged_cb fn,
+ struct device *codec_dev)
+{
+ bool plugged;
+
+ mutex_lock(&hdmi->mutex);
+ hdmi->plugged_cb = fn;
+ hdmi->codec_dev = codec_dev;
+ plugged = hdmi->last_connector_result == connector_status_connected;
+ handle_plugged_change(hdmi, plugged);
+ mutex_unlock(&hdmi->mutex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_set_plugged_cb);
+
static void hdmi_modb(struct dw_hdmi *hdmi, u8 data, u8 mask, unsigned reg)
{
regmap_update_bits(hdmi->regm, reg << hdmi->reg_shift, mask, data);
@@ -2229,6 +2255,7 @@ dw_hdmi_connector_detect(struct drm_connector *connector, bool force)
{
struct dw_hdmi *hdmi = container_of(connector, struct dw_hdmi,
connector);
+ enum drm_connector_status result;
mutex_lock(&hdmi->mutex);
hdmi->force = DRM_FORCE_UNSPECIFIED;
@@ -2236,7 +2263,18 @@ dw_hdmi_connector_detect(struct drm_connector *connector, bool force)
dw_hdmi_update_phy_mask(hdmi);
mutex_unlock(&hdmi->mutex);
- return hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data);
+ result = hdmi->phy.ops->read_hpd(hdmi, hdmi->phy.data);
+
+ mutex_lock(&hdmi->mutex);
+ if (result != hdmi->last_connector_result) {
+ dev_dbg(hdmi->dev, "read_hpd result: %d", result);
+ handle_plugged_change(hdmi,
+ result == connector_status_connected);
+ hdmi->last_connector_result = result;
+ }
+ mutex_unlock(&hdmi->mutex);
+
+ return result;
}
static int dw_hdmi_connector_get_modes(struct drm_connector *connector)
@@ -2731,6 +2769,7 @@ __dw_hdmi_probe(struct platform_device *pdev,
hdmi->rxsense = true;
hdmi->phy_mask = (u8)~(HDMI_PHY_HPD | HDMI_PHY_RX_SENSE);
hdmi->mc_clkdis = 0x7f;
+ hdmi->last_connector_result = connector_status_disconnected;
mutex_init(&hdmi->mutex);
mutex_init(&hdmi->audio_mutex);
diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c b/drivers/gpu/drm/bridge/ti-tfp410.c
index aa3198dc9903..6f6d6d1e60ae 100644
--- a/drivers/gpu/drm/bridge/ti-tfp410.c
+++ b/drivers/gpu/drm/bridge/ti-tfp410.c
@@ -285,8 +285,8 @@ static int tfp410_get_connector_properties(struct tfp410 *dvi)
else
dvi->connector_type = DRM_MODE_CONNECTOR_DVID;
- dvi->hpd = fwnode_get_named_gpiod(&connector_node->fwnode,
- "hpd-gpios", 0, GPIOD_IN, "hpd");
+ dvi->hpd = fwnode_gpiod_get_index(&connector_node->fwnode,
+ "hpd", 0, GPIOD_IN, "hpd");
if (IS_ERR(dvi->hpd)) {
ret = PTR_ERR(dvi->hpd);
dvi->hpd = NULL;
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index 0965632008a9..2166000ed057 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -712,7 +712,7 @@ void drm_connector_list_iter_end(struct drm_connector_list_iter *iter)
__drm_connector_put_safe(iter->conn);
spin_unlock_irqrestore(&config->connector_list_lock, flags);
}
- lock_release(&connector_list_iter_dep_map, 0, _RET_IP_);
+ lock_release(&connector_list_iter_dep_map, _RET_IP_);
}
EXPORT_SYMBOL(drm_connector_list_iter_end);
diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
index 892ce636ef72..6ee04803c362 100644
--- a/drivers/gpu/drm/drm_property.c
+++ b/drivers/gpu/drm/drm_property.c
@@ -561,7 +561,7 @@ drm_property_create_blob(struct drm_device *dev, size_t length,
struct drm_property_blob *blob;
int ret;
- if (!length || length > ULONG_MAX - sizeof(struct drm_property_blob))
+ if (!length || length > INT_MAX - sizeof(struct drm_property_blob))
return ERR_PTR(-EINVAL);
blob = kvzalloc(sizeof(struct drm_property_blob)+length, GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index eea79125b3ea..438040ff0179 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -7,7 +7,6 @@ config DRM_I915_WERROR
# We use the dependency on !COMPILE_TEST to not be enabled in
# allmodconfig or allyesconfig configurations
depends on !COMPILE_TEST
- select HEADER_TEST
default n
help
Add -Werror to the build flags for (and only for) i915.ko.
@@ -22,7 +21,6 @@ config DRM_I915_DEBUG
depends on DRM_I915
select DEBUG_FS
select PREEMPT_COUNT
- select REFCOUNT_FULL
select I2C_CHARDEV
select STACKDEPOT
select DRM_DP_AUX_CHARDEV
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index 707ac110e271..ce1b64f4dd44 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -5015,6 +5015,9 @@ void intel_power_domains_init_hw(struct drm_i915_private *i915, bool resume)
power_domains->initializing = true;
+ /* Must happen before power domain init on VLV/CHV */
+ intel_update_rawclk(i915);
+
if (INTEL_GEN(i915) >= 11) {
icl_display_core_init(i915, resume);
} else if (IS_CANNONLAKE(i915)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 4237a2887ff2..337ba17b1e0e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1687,7 +1687,7 @@ replace:
i915_gem_context_set_user_engines(ctx);
else
i915_gem_context_clear_user_engines(ctx);
- rcu_swap_protected(ctx->engines, set.engines, 1);
+ set.engines = rcu_replace_pointer(ctx->engines, set.engines, 1);
mutex_unlock(&ctx->engines_mutex);
call_rcu(&set.engines->rcu, free_engines_rcu);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index fd3ce6da8497..f2418a1cfe68 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -437,12 +437,12 @@ void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
fs_reclaim_acquire(GFP_KERNEL);
mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
- mutex_release(&mutex->dep_map, 0, _RET_IP_);
+ mutex_release(&mutex->dep_map, _RET_IP_);
fs_reclaim_release(GFP_KERNEL);
if (unlock)
- mutex_release(&i915->drm.struct_mutex.dep_map, 0, _RET_IP_);
+ mutex_release(&i915->drm.struct_mutex.dep_map, _RET_IP_);
}
#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
index 0e1ad4a4bd97..c1dd0cd3efc7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c
@@ -55,7 +55,7 @@ static inline unsigned long __timeline_mark_lock(struct intel_context *ce)
static inline void __timeline_mark_unlock(struct intel_context *ce,
unsigned long flags)
{
- mutex_release(&ce->timeline->mutex.dep_map, 0, _THIS_IP_);
+ mutex_release(&ce->timeline->mutex.dep_map, _THIS_IP_);
local_irq_restore(flags);
}
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pool.c b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
index 3cdbd5f8b5be..397186818305 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pool.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pool.c
@@ -104,6 +104,8 @@ node_create(struct intel_engine_pool *pool, size_t sz)
return ERR_CAST(obj);
}
+ i915_gem_object_set_readonly(obj);
+
node->obj = obj;
return node;
}
diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c
index 6e881c735b20..2b977991b785 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -200,14 +200,6 @@ static const struct drm_i915_mocs_entry broxton_mocs_table[] = {
MOCS_ENTRY(15, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(2) | LE_AOM(1), \
L3_3_WB), \
- /* Bypass LLC - Uncached (EHL+) */ \
- MOCS_ENTRY(16, \
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
- L3_1_UC), \
- /* Bypass LLC - L3 (Read-Only) (EHL+) */ \
- MOCS_ENTRY(17, \
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1), \
- L3_3_WB), \
/* Self-Snoop - L3 + LLC */ \
MOCS_ENTRY(18, \
LE_3_WB | LE_TC_1_LLC | LE_LRUM(3) | LE_SSE(3), \
@@ -271,7 +263,7 @@ static const struct drm_i915_mocs_entry tigerlake_mocs_table[] = {
L3_1_UC),
/* HW Special Case (Displayable) */
MOCS_ENTRY(61,
- LE_1_UC | LE_TC_1_LLC | LE_SCF(1),
+ LE_1_UC | LE_TC_1_LLC,
L3_3_WB),
};
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index a816aef6142b..e451298d11c3 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -499,8 +499,6 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id)
goto out_free_gem;
}
- i915_gem_object_put(obj);
-
ret = dma_buf_fd(dmabuf, DRM_CLOEXEC | DRM_RDWR);
if (ret < 0) {
gvt_vgpu_err("create dma-buf fd failed ret:%d\n", ret);
@@ -525,6 +523,8 @@ int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id)
file_count(dmabuf->file),
kref_read(&obj->base.refcount));
+ i915_gem_object_put(obj);
+
return dmabuf_fd;
out_free_dmabuf:
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index dca15ace88f6..a19e7d89bc8a 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -384,7 +384,7 @@ void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
mutex_acquire(&ref->mutex.dep_map, 0, 0, _THIS_IP_);
if (!__i915_active_fence_set(&ref->excl, f))
atomic_inc(&ref->count);
- mutex_release(&ref->mutex.dep_map, 0, _THIS_IP_);
+ mutex_release(&ref->mutex.dep_map, _THIS_IP_);
}
bool i915_active_acquire_if_busy(struct i915_active *ref)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 87e05ca3646a..3c512c571e60 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -297,9 +297,6 @@ static int i915_driver_modeset_probe(struct drm_i915_private *i915)
if (ret)
goto cleanup_vga_client;
- /* must happen before intel_power_domains_init_hw() on VLV/CHV */
- intel_update_rawclk(i915);
-
intel_power_domains_init_hw(i915, false);
intel_csr_ucode_init(i915);
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 00011f9533b6..bbd71af00a91 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1502,7 +1502,7 @@ long i915_request_wait(struct i915_request *rq,
dma_fence_remove_callback(&rq->fence, &wait.cb);
out:
- mutex_release(&rq->engine->gt->reset.mutex.dep_map, 0, _THIS_IP_);
+ mutex_release(&rq->engine->gt->reset.mutex.dep_map, _THIS_IP_);
trace_i915_request_wait_end(rq);
return timeout;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 668d4bd0c118..df9bf1fd1bc0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -88,6 +88,7 @@ nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst)
}
struct nouveau_svmm {
+ struct mmu_notifier notifier;
struct nouveau_vmm *vmm;
struct {
unsigned long start;
@@ -95,9 +96,6 @@ struct nouveau_svmm {
} unmanaged;
struct mutex mutex;
-
- struct mm_struct *mm;
- struct hmm_mirror mirror;
};
#define SVMM_DBG(s,f,a...) \
@@ -251,10 +249,11 @@ nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit)
}
static int
-nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
- const struct mmu_notifier_range *update)
+nouveau_svmm_invalidate_range_start(struct mmu_notifier *mn,
+ const struct mmu_notifier_range *update)
{
- struct nouveau_svmm *svmm = container_of(mirror, typeof(*svmm), mirror);
+ struct nouveau_svmm *svmm =
+ container_of(mn, struct nouveau_svmm, notifier);
unsigned long start = update->start;
unsigned long limit = update->end;
@@ -264,6 +263,9 @@ nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit);
mutex_lock(&svmm->mutex);
+ if (unlikely(!svmm->vmm))
+ goto out;
+
if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) {
if (start < svmm->unmanaged.start) {
nouveau_svmm_invalidate(svmm, start,
@@ -273,19 +275,20 @@ nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
}
nouveau_svmm_invalidate(svmm, start, limit);
+
+out:
mutex_unlock(&svmm->mutex);
return 0;
}
-static void
-nouveau_svmm_release(struct hmm_mirror *mirror)
+static void nouveau_svmm_free_notifier(struct mmu_notifier *mn)
{
+ kfree(container_of(mn, struct nouveau_svmm, notifier));
}
-static const struct hmm_mirror_ops
-nouveau_svmm = {
- .sync_cpu_device_pagetables = nouveau_svmm_sync_cpu_device_pagetables,
- .release = nouveau_svmm_release,
+static const struct mmu_notifier_ops nouveau_mn_ops = {
+ .invalidate_range_start = nouveau_svmm_invalidate_range_start,
+ .free_notifier = nouveau_svmm_free_notifier,
};
void
@@ -293,8 +296,10 @@ nouveau_svmm_fini(struct nouveau_svmm **psvmm)
{
struct nouveau_svmm *svmm = *psvmm;
if (svmm) {
- hmm_mirror_unregister(&svmm->mirror);
- kfree(*psvmm);
+ mutex_lock(&svmm->mutex);
+ svmm->vmm = NULL;
+ mutex_unlock(&svmm->mutex);
+ mmu_notifier_put(&svmm->notifier);
*psvmm = NULL;
}
}
@@ -320,7 +325,7 @@ nouveau_svmm_init(struct drm_device *dev, void *data,
mutex_lock(&cli->mutex);
if (cli->svm.cli) {
ret = -EBUSY;
- goto done;
+ goto out_free;
}
/* Allocate a new GPU VMM that can support SVM (managed by the
@@ -335,24 +340,26 @@ nouveau_svmm_init(struct drm_device *dev, void *data,
.fault_replay = true,
}, sizeof(struct gp100_vmm_v0), &cli->svm.vmm);
if (ret)
- goto done;
-
- /* Enable HMM mirroring of CPU address-space to VMM. */
- svmm->mm = get_task_mm(current);
- down_write(&svmm->mm->mmap_sem);
- svmm->mirror.ops = &nouveau_svmm;
- ret = hmm_mirror_register(&svmm->mirror, svmm->mm);
- if (ret == 0) {
- cli->svm.svmm = svmm;
- cli->svm.cli = cli;
- }
- up_write(&svmm->mm->mmap_sem);
- mmput(svmm->mm);
+ goto out_free;
-done:
+ down_write(&current->mm->mmap_sem);
+ svmm->notifier.ops = &nouveau_mn_ops;
+ ret = __mmu_notifier_register(&svmm->notifier, current->mm);
if (ret)
- nouveau_svmm_fini(&svmm);
+ goto out_mm_unlock;
+ /* Note, ownership of svmm transfers to mmu_notifier */
+
+ cli->svm.svmm = svmm;
+ cli->svm.cli = cli;
+ up_write(&current->mm->mmap_sem);
mutex_unlock(&cli->mutex);
+ return 0;
+
+out_mm_unlock:
+ up_write(&current->mm->mmap_sem);
+out_free:
+ mutex_unlock(&cli->mutex);
+ kfree(svmm);
return ret;
}
@@ -475,43 +482,90 @@ nouveau_svm_fault_cache(struct nouveau_svm *svm,
fault->inst, fault->addr, fault->access);
}
-static inline bool
-nouveau_range_done(struct hmm_range *range)
+struct svm_notifier {
+ struct mmu_interval_notifier notifier;
+ struct nouveau_svmm *svmm;
+};
+
+static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- bool ret = hmm_range_valid(range);
+ struct svm_notifier *sn =
+ container_of(mni, struct svm_notifier, notifier);
- hmm_range_unregister(range);
- return ret;
+ /*
+ * serializes the update to mni->invalidate_seq done by caller and
+ * prevents invalidation of the PTE from progressing while HW is being
+ * programmed. This is very hacky and only works because the normal
+ * notifier that does invalidation is always called after the range
+ * notifier.
+ */
+ if (mmu_notifier_range_blockable(range))
+ mutex_lock(&sn->svmm->mutex);
+ else if (!mutex_trylock(&sn->svmm->mutex))
+ return false;
+ mmu_interval_set_seq(mni, cur_seq);
+ mutex_unlock(&sn->svmm->mutex);
+ return true;
}
-static int
-nouveau_range_fault(struct nouveau_svmm *svmm, struct hmm_range *range)
+static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
+ .invalidate = nouveau_svm_range_invalidate,
+};
+
+static int nouveau_range_fault(struct nouveau_svmm *svmm,
+ struct nouveau_drm *drm, void *data, u32 size,
+ u64 *pfns, struct svm_notifier *notifier)
{
+ unsigned long timeout =
+ jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+ /* Have HMM fault pages within the fault window to the GPU. */
+ struct hmm_range range = {
+ .notifier = &notifier->notifier,
+ .start = notifier->notifier.interval_tree.start,
+ .end = notifier->notifier.interval_tree.last + 1,
+ .pfns = pfns,
+ .flags = nouveau_svm_pfn_flags,
+ .values = nouveau_svm_pfn_values,
+ .pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT,
+ };
+ struct mm_struct *mm = notifier->notifier.mm;
long ret;
- range->default_flags = 0;
- range->pfn_flags_mask = -1UL;
+ while (true) {
+ if (time_after(jiffies, timeout))
+ return -EBUSY;
+
+ range.notifier_seq = mmu_interval_read_begin(range.notifier);
+ range.default_flags = 0;
+ range.pfn_flags_mask = -1UL;
+ down_read(&mm->mmap_sem);
+ ret = hmm_range_fault(&range, 0);
+ up_read(&mm->mmap_sem);
+ if (ret <= 0) {
+ if (ret == 0 || ret == -EBUSY)
+ continue;
+ return ret;
+ }
- ret = hmm_range_register(range, &svmm->mirror);
- if (ret) {
- up_read(&svmm->mm->mmap_sem);
- return (int)ret;
+ mutex_lock(&svmm->mutex);
+ if (mmu_interval_read_retry(range.notifier,
+ range.notifier_seq)) {
+ mutex_unlock(&svmm->mutex);
+ continue;
+ }
+ break;
}
- if (!hmm_range_wait_until_valid(range, HMM_RANGE_DEFAULT_TIMEOUT)) {
- up_read(&svmm->mm->mmap_sem);
- return -EBUSY;
- }
+ nouveau_dmem_convert_pfn(drm, &range);
- ret = hmm_range_fault(range, 0);
- if (ret <= 0) {
- if (ret == 0)
- ret = -EBUSY;
- up_read(&svmm->mm->mmap_sem);
- hmm_range_unregister(range);
- return ret;
- }
- return 0;
+ svmm->vmm->vmm.object.client->super = true;
+ ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL);
+ svmm->vmm->vmm.object.client->super = false;
+ mutex_unlock(&svmm->mutex);
+
+ return ret;
}
static int
@@ -531,7 +585,6 @@ nouveau_svm_fault(struct nvif_notify *notify)
} i;
u64 phys[16];
} args;
- struct hmm_range range;
struct vm_area_struct *vma;
u64 inst, start, limit;
int fi, fn, pi, fill;
@@ -587,6 +640,9 @@ nouveau_svm_fault(struct nvif_notify *notify)
args.i.p.version = 0;
for (fi = 0; fn = fi + 1, fi < buffer->fault_nr; fi = fn) {
+ struct svm_notifier notifier;
+ struct mm_struct *mm;
+
/* Cancel any faults from non-SVM channels. */
if (!(svmm = buffer->fault[fi]->svmm)) {
nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
@@ -606,24 +662,32 @@ nouveau_svm_fault(struct nvif_notify *notify)
start = max_t(u64, start, svmm->unmanaged.limit);
SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
+ mm = svmm->notifier.mm;
+ if (!mmget_not_zero(mm)) {
+ nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
+ continue;
+ }
+
/* Intersect fault window with the CPU VMA, cancelling
* the fault if the address is invalid.
*/
- down_read(&svmm->mm->mmap_sem);
- vma = find_vma_intersection(svmm->mm, start, limit);
+ down_read(&mm->mmap_sem);
+ vma = find_vma_intersection(mm, start, limit);
if (!vma) {
SVMM_ERR(svmm, "wndw %016llx-%016llx", start, limit);
- up_read(&svmm->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
continue;
}
start = max_t(u64, start, vma->vm_start);
limit = min_t(u64, limit, vma->vm_end);
+ up_read(&mm->mmap_sem);
SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
if (buffer->fault[fi]->addr != start) {
SVMM_ERR(svmm, "addr %016llx", buffer->fault[fi]->addr);
- up_read(&svmm->mm->mmap_sem);
+ mmput(mm);
nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
continue;
}
@@ -679,33 +743,19 @@ nouveau_svm_fault(struct nvif_notify *notify)
args.i.p.addr,
args.i.p.addr + args.i.p.size, fn - fi);
- /* Have HMM fault pages within the fault window to the GPU. */
- range.start = args.i.p.addr;
- range.end = args.i.p.addr + args.i.p.size;
- range.pfns = args.phys;
- range.flags = nouveau_svm_pfn_flags;
- range.values = nouveau_svm_pfn_values;
- range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT;
-again:
- ret = nouveau_range_fault(svmm, &range);
- if (ret == 0) {
- mutex_lock(&svmm->mutex);
- if (!nouveau_range_done(&range)) {
- mutex_unlock(&svmm->mutex);
- goto again;
- }
-
- nouveau_dmem_convert_pfn(svm->drm, &range);
-
- svmm->vmm->vmm.object.client->super = true;
- ret = nvif_object_ioctl(&svmm->vmm->vmm.object,
- &args, sizeof(args.i) +
- pi * sizeof(args.phys[0]),
- NULL);
- svmm->vmm->vmm.object.client->super = false;
- mutex_unlock(&svmm->mutex);
- up_read(&svmm->mm->mmap_sem);
+ notifier.svmm = svmm;
+ ret = mmu_interval_notifier_insert(&notifier.notifier,
+ svmm->notifier.mm,
+ args.i.p.addr, args.i.p.size,
+ &nouveau_svm_mni_ops);
+ if (!ret) {
+ ret = nouveau_range_fault(
+ svmm, svm->drm, &args,
+ sizeof(args.i) + pi * sizeof(args.phys[0]),
+ args.phys, &notifier);
+ mmu_interval_notifier_remove(&notifier.notifier);
}
+ mmput(mm);
/* Cancel any faults in the window whose pages didn't manage
* to keep their valid bit, or stay writeable when required.
@@ -714,10 +764,10 @@ again:
*/
while (fi < fn) {
struct nouveau_svm_fault *fault = buffer->fault[fi++];
- pi = (fault->addr - range.start) >> PAGE_SHIFT;
+ pi = (fault->addr - args.i.p.addr) >> PAGE_SHIFT;
if (ret ||
- !(range.pfns[pi] & NVIF_VMM_PFNMAP_V0_V) ||
- (!(range.pfns[pi] & NVIF_VMM_PFNMAP_V0_W) &&
+ !(args.phys[pi] & NVIF_VMM_PFNMAP_V0_V) ||
+ (!(args.phys[pi] & NVIF_VMM_PFNMAP_V0_W) &&
fault->access != 0 && fault->access != 3)) {
nouveau_svm_fault_cancel_fault(svm, fault);
continue;
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index acabeaf28732..40a7e702c2a9 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -9500,7 +9500,6 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
{
struct pci_dev *root = rdev->pdev->bus->self;
enum pci_bus_speed speed_cap;
- int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -9542,12 +9541,7 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
}
- bridge_pos = pci_pcie_cap(root);
- if (!bridge_pos)
- return;
-
- gpu_pos = pci_pcie_cap(rdev->pdev);
- if (!gpu_pos)
+ if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
return;
if (speed_cap == PCIE_SPEED_8_0GT) {
@@ -9557,14 +9551,17 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
+ tmp16);
tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
@@ -9582,15 +9579,23 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
for (i = 0; i < 10; i++) {
/* check status */
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp |= LC_SET_QUIESCE;
@@ -9603,26 +9608,45 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
msleep(100);
/* linkctl */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL,
+ tmp16);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(rdev->pdev,
+ PCI_EXP_LNKCTL,
+ tmp16);
/* linkctl2 */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(rdev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp &= ~LC_SET_QUIESCE;
@@ -9636,15 +9660,15 @@ static void cik_pcie_gen3_enable(struct radeon_device *rdev)
speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~0xf;
+ pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
if (speed_cap == PCIE_SPEED_8_0GT)
- tmp16 |= 3; /* gen3 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (speed_cap == PCIE_SPEED_5_0GT)
- tmp16 |= 2; /* gen2 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
- tmp16 |= 1; /* gen1 */
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index d59b004f6695..30e32adc1fc6 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -68,6 +68,10 @@
#include <linux/hashtable.h>
#include <linux/dma-fence.h>
+#ifdef CONFIG_MMU_NOTIFIER
+#include <linux/mmu_notifier.h>
+#endif
+
#include <drm/ttm/ttm_bo_api.h>
#include <drm/ttm/ttm_bo_driver.h>
#include <drm/ttm/ttm_placement.h>
@@ -509,8 +513,9 @@ struct radeon_bo {
struct ttm_bo_kmap_obj dma_buf_vmap;
pid_t pid;
- struct radeon_mn *mn;
- struct list_head mn_list;
+#ifdef CONFIG_MMU_NOTIFIER
+ struct mmu_interval_notifier notifier;
+#endif
};
#define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, tbo.base)
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index dbab9a3a969b..f93829f08a4d 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -36,131 +36,51 @@
#include "radeon.h"
-struct radeon_mn {
- struct mmu_notifier mn;
-
- /* objects protected by lock */
- struct mutex lock;
- struct rb_root_cached objects;
-};
-
-struct radeon_mn_node {
- struct interval_tree_node it;
- struct list_head bos;
-};
-
/**
- * radeon_mn_invalidate_range_start - callback to notify about mm change
+ * radeon_mn_invalidate - callback to notify about mm change
*
* @mn: our notifier
- * @mn: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @range: the VMA under invalidation
*
* We block for all BOs between start and end to be idle and
* unmap them by move them into system domain again.
*/
-static int radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
+static bool radeon_mn_invalidate(struct mmu_interval_notifier *mn,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
- struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn);
+ struct radeon_bo *bo = container_of(mn, struct radeon_bo, notifier);
struct ttm_operation_ctx ctx = { false, false };
- struct interval_tree_node *it;
- unsigned long end;
- int ret = 0;
-
- /* notification is exclusive, but interval is inclusive */
- end = range->end - 1;
-
- /* TODO we should be able to split locking for interval tree and
- * the tear down.
- */
- if (mmu_notifier_range_blockable(range))
- mutex_lock(&rmn->lock);
- else if (!mutex_trylock(&rmn->lock))
- return -EAGAIN;
-
- it = interval_tree_iter_first(&rmn->objects, range->start, end);
- while (it) {
- struct radeon_mn_node *node;
- struct radeon_bo *bo;
- long r;
-
- if (!mmu_notifier_range_blockable(range)) {
- ret = -EAGAIN;
- goto out_unlock;
- }
-
- node = container_of(it, struct radeon_mn_node, it);
- it = interval_tree_iter_next(it, range->start, end);
+ long r;
- list_for_each_entry(bo, &node->bos, mn_list) {
+ if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound)
+ return true;
- if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound)
- continue;
+ if (!mmu_notifier_range_blockable(range))
+ return false;
- r = radeon_bo_reserve(bo, true);
- if (r) {
- DRM_ERROR("(%ld) failed to reserve user bo\n", r);
- continue;
- }
-
- r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv,
- true, false, MAX_SCHEDULE_TIMEOUT);
- if (r <= 0)
- DRM_ERROR("(%ld) failed to wait for user bo\n", r);
-
- radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (r)
- DRM_ERROR("(%ld) failed to validate user bo\n", r);
-
- radeon_bo_unreserve(bo);
- }
+ r = radeon_bo_reserve(bo, true);
+ if (r) {
+ DRM_ERROR("(%ld) failed to reserve user bo\n", r);
+ return true;
}
-
-out_unlock:
- mutex_unlock(&rmn->lock);
-
- return ret;
-}
-
-static void radeon_mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
-{
- struct mmu_notifier_range range = {
- .mm = mm,
- .start = 0,
- .end = ULONG_MAX,
- .flags = 0,
- .event = MMU_NOTIFY_UNMAP,
- };
-
- radeon_mn_invalidate_range_start(mn, &range);
-}
-
-static struct mmu_notifier *radeon_mn_alloc_notifier(struct mm_struct *mm)
-{
- struct radeon_mn *rmn;
- rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
- if (!rmn)
- return ERR_PTR(-ENOMEM);
+ r = dma_resv_wait_timeout_rcu(bo->tbo.base.resv, true, false,
+ MAX_SCHEDULE_TIMEOUT);
+ if (r <= 0)
+ DRM_ERROR("(%ld) failed to wait for user bo\n", r);
- mutex_init(&rmn->lock);
- rmn->objects = RB_ROOT_CACHED;
- return &rmn->mn;
-}
+ radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r)
+ DRM_ERROR("(%ld) failed to validate user bo\n", r);
-static void radeon_mn_free_notifier(struct mmu_notifier *mn)
-{
- kfree(container_of(mn, struct radeon_mn, mn));
+ radeon_bo_unreserve(bo);
+ return true;
}
-static const struct mmu_notifier_ops radeon_mn_ops = {
- .release = radeon_mn_release,
- .invalidate_range_start = radeon_mn_invalidate_range_start,
- .alloc_notifier = radeon_mn_alloc_notifier,
- .free_notifier = radeon_mn_free_notifier,
+static const struct mmu_interval_notifier_ops radeon_mn_ops = {
+ .invalidate = radeon_mn_invalidate,
};
/**
@@ -174,51 +94,20 @@ static const struct mmu_notifier_ops radeon_mn_ops = {
*/
int radeon_mn_register(struct radeon_bo *bo, unsigned long addr)
{
- unsigned long end = addr + radeon_bo_size(bo) - 1;
- struct mmu_notifier *mn;
- struct radeon_mn *rmn;
- struct radeon_mn_node *node = NULL;
- struct list_head bos;
- struct interval_tree_node *it;
-
- mn = mmu_notifier_get(&radeon_mn_ops, current->mm);
- if (IS_ERR(mn))
- return PTR_ERR(mn);
- rmn = container_of(mn, struct radeon_mn, mn);
-
- INIT_LIST_HEAD(&bos);
-
- mutex_lock(&rmn->lock);
-
- while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) {
- kfree(node);
- node = container_of(it, struct radeon_mn_node, it);
- interval_tree_remove(&node->it, &rmn->objects);
- addr = min(it->start, addr);
- end = max(it->last, end);
- list_splice(&node->bos, &bos);
- }
-
- if (!node) {
- node = kmalloc(sizeof(struct radeon_mn_node), GFP_KERNEL);
- if (!node) {
- mutex_unlock(&rmn->lock);
- return -ENOMEM;
- }
- }
-
- bo->mn = rmn;
-
- node->it.start = addr;
- node->it.last = end;
- INIT_LIST_HEAD(&node->bos);
- list_splice(&bos, &node->bos);
- list_add(&bo->mn_list, &node->bos);
-
- interval_tree_insert(&node->it, &rmn->objects);
-
- mutex_unlock(&rmn->lock);
-
+ int ret;
+
+ ret = mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
+ radeon_bo_size(bo), &radeon_mn_ops);
+ if (ret)
+ return ret;
+
+ /*
+ * FIXME: radeon appears to allow get_user_pages to run during
+ * invalidate_range_start/end, which is not a safe way to read the
+ * PTEs. It should use the mmu_interval_read_begin() scheme around the
+ * get_user_pages to ensure that the PTEs are read properly
+ */
+ mmu_interval_read_begin(&bo->notifier);
return 0;
}
@@ -231,27 +120,8 @@ int radeon_mn_register(struct radeon_bo *bo, unsigned long addr)
*/
void radeon_mn_unregister(struct radeon_bo *bo)
{
- struct radeon_mn *rmn = bo->mn;
- struct list_head *head;
-
- if (!rmn)
+ if (!bo->notifier.mm)
return;
-
- mutex_lock(&rmn->lock);
- /* save the next list entry for later */
- head = bo->mn_list.next;
-
- list_del(&bo->mn_list);
-
- if (list_empty(head)) {
- struct radeon_mn_node *node;
- node = container_of(head, struct radeon_mn_node, bos);
- interval_tree_remove(&node->it, &rmn->objects);
- kfree(node);
- }
-
- mutex_unlock(&rmn->lock);
-
- mmu_notifier_put(&rmn->mn);
- bo->mn = NULL;
+ mmu_interval_notifier_remove(&bo->notifier);
+ bo->notifier.mm = NULL;
}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 1d8efb0eefdb..d7eea75b2c27 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -3257,7 +3257,7 @@ static void si_gpu_init(struct radeon_device *rdev)
/* XXX what about 12? */
rdev->config.si.tile_config |= (3 << 0);
break;
- }
+ }
switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
case 0: /* four banks */
rdev->config.si.tile_config |= 0 << 4;
@@ -7087,7 +7087,6 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
{
struct pci_dev *root = rdev->pdev->bus->self;
enum pci_bus_speed speed_cap;
- int bridge_pos, gpu_pos;
u32 speed_cntl, current_data_rate;
int i;
u16 tmp16;
@@ -7129,12 +7128,7 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
}
- bridge_pos = pci_pcie_cap(root);
- if (!bridge_pos)
- return;
-
- gpu_pos = pci_pcie_cap(rdev->pdev);
- if (!gpu_pos)
+ if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
return;
if (speed_cap == PCIE_SPEED_8_0GT) {
@@ -7144,14 +7138,17 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
u16 bridge_cfg2, gpu_cfg2;
u32 max_lw, current_lw, tmp;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL,
+ &gpu_cfg);
tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root, PCI_EXP_LNKCTL, tmp16);
tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL,
+ tmp16);
tmp = RREG32_PCIE(PCIE_LC_STATUS1);
max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
@@ -7169,15 +7166,23 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
for (i = 0; i < 10; i++) {
/* check status */
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_DEVSTA,
+ &tmp16);
if (tmp16 & PCI_EXP_DEVSTA_TRPND)
break;
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &bridge_cfg);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL,
+ &gpu_cfg);
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &bridge_cfg2);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL2,
+ &gpu_cfg2);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp |= LC_SET_QUIESCE;
@@ -7190,26 +7195,46 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
msleep(100);
/* linkctl */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL,
+ tmp16);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL,
+ &tmp16);
tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
+ pcie_capability_write_word(rdev->pdev,
+ PCI_EXP_LNKCTL,
+ tmp16);
/* linkctl2 */
- pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
-
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~((1 << 4) | (7 << 9));
- tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (bridge_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(root,
+ PCI_EXP_LNKCTL2,
+ tmp16);
+
+ pcie_capability_read_word(rdev->pdev,
+ PCI_EXP_LNKCTL2,
+ &tmp16);
+ tmp16 &= ~(PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN);
+ tmp16 |= (gpu_cfg2 &
+ (PCI_EXP_LNKCTL2_ENTER_COMP |
+ PCI_EXP_LNKCTL2_TX_MARGIN));
+ pcie_capability_write_word(rdev->pdev,
+ PCI_EXP_LNKCTL2,
+ tmp16);
tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
tmp &= ~LC_SET_QUIESCE;
@@ -7223,15 +7248,15 @@ static void si_pcie_gen3_enable(struct radeon_device *rdev)
speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
- pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
- tmp16 &= ~0xf;
+ pcie_capability_read_word(rdev->pdev, PCI_EXP_LNKCTL2, &tmp16);
+ tmp16 &= ~PCI_EXP_LNKCTL2_TLS;
if (speed_cap == PCIE_SPEED_8_0GT)
- tmp16 |= 3; /* gen3 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
else if (speed_cap == PCIE_SPEED_5_0GT)
- tmp16 |= 2; /* gen2 */
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
else
- tmp16 |= 1; /* gen1 */
- pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
+ tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
+ pcie_capability_write_word(rdev->pdev, PCI_EXP_LNKCTL2, tmp16);
speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index fad72799b8df..42651d737c55 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -489,7 +489,7 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
WARN_ON(!tcon->quirks->has_channel_0);
- tcon->dclk_min_div = 6;
+ tcon->dclk_min_div = 1;
tcon->dclk_max_div = 127;
sun4i_tcon0_mode_set_common(tcon, mode);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 4b34a278d65b..11863fbdd5d6 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -42,8 +42,6 @@
#include <linux/uaccess.h>
#include <linux/mem_encrypt.h>
-#define TTM_BO_VM_NUM_PREFAULT 16
-
static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
struct vm_fault *vmf)
{
@@ -106,25 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
+ page_offset;
}
-static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+/**
+ * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
+ * @bo: The buffer object
+ * @vmf: The fault structure handed to the callback
+ *
+ * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
+ * during long waits, and after the wait the callback will be restarted. This
+ * is to allow other threads using the same virtual memory space concurrent
+ * access to map(), unmap() completely unrelated buffer objects. TTM buffer
+ * object reservations sometimes wait for GPU and should therefore be
+ * considered long waits. This function reserves the buffer object interruptibly
+ * taking this into account. Starvation is avoided by the vm system not
+ * allowing too many repeated restarts.
+ * This function is intended to be used in customized fault() and _mkwrite()
+ * handlers.
+ *
+ * Return:
+ * 0 on success and the bo was reserved.
+ * VM_FAULT_RETRY if blocking wait.
+ * VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
+ */
+vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+ struct vm_fault *vmf)
{
- struct vm_area_struct *vma = vmf->vma;
- struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
- vma->vm_private_data;
- struct ttm_bo_device *bdev = bo->bdev;
- unsigned long page_offset;
- unsigned long page_last;
- unsigned long pfn;
- struct ttm_tt *ttm = NULL;
- struct page *page;
- int err;
- int i;
- vm_fault_t ret = VM_FAULT_NOPAGE;
- unsigned long address = vmf->address;
- struct ttm_mem_type_manager *man =
- &bdev->man[bo->mem.mem_type];
- struct vm_area_struct cvma;
-
/*
* Work around locking order reversal in fault / nopfn
* between mmap_sem and bo_reserve: Perform a trylock operation
@@ -151,14 +154,54 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}
+ return 0;
+}
+EXPORT_SYMBOL(ttm_bo_vm_reserve);
+
+/**
+ * ttm_bo_vm_fault_reserved - TTM fault helper
+ * @vmf: The struct vm_fault given as argument to the fault callback
+ * @prot: The page protection to be used for this memory area.
+ * @num_prefault: Maximum number of prefault pages. The caller may want to
+ * specify this based on madvice settings and the size of the GPU object
+ * backed by the memory.
+ *
+ * This function inserts one or more page table entries pointing to the
+ * memory backing the buffer object, and then returns a return code
+ * instructing the caller to retry the page access.
+ *
+ * Return:
+ * VM_FAULT_NOPAGE on success or pending signal
+ * VM_FAULT_SIGBUS on unspecified error
+ * VM_FAULT_OOM on out-of-memory
+ * VM_FAULT_RETRY if retryable wait
+ */
+vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+ pgprot_t prot,
+ pgoff_t num_prefault)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct vm_area_struct cvma = *vma;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
+ struct ttm_bo_device *bdev = bo->bdev;
+ unsigned long page_offset;
+ unsigned long page_last;
+ unsigned long pfn;
+ struct ttm_tt *ttm = NULL;
+ struct page *page;
+ int err;
+ pgoff_t i;
+ vm_fault_t ret = VM_FAULT_NOPAGE;
+ unsigned long address = vmf->address;
+ struct ttm_mem_type_manager *man =
+ &bdev->man[bo->mem.mem_type];
+
/*
* Refuse to fault imported pages. This should be handled
* (if at all) by redirecting mmap to the exporter.
*/
- if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) {
- ret = VM_FAULT_SIGBUS;
- goto out_unlock;
- }
+ if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG))
+ return VM_FAULT_SIGBUS;
if (bdev->driver->fault_reserve_notify) {
struct dma_fence *moving = dma_fence_get(bo->moving);
@@ -169,11 +212,9 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
break;
case -EBUSY:
case -ERESTARTSYS:
- ret = VM_FAULT_NOPAGE;
- goto out_unlock;
+ return VM_FAULT_NOPAGE;
default:
- ret = VM_FAULT_SIGBUS;
- goto out_unlock;
+ return VM_FAULT_SIGBUS;
}
if (bo->moving != moving) {
@@ -189,21 +230,12 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
* move.
*/
ret = ttm_bo_vm_fault_idle(bo, vmf);
- if (unlikely(ret != 0)) {
- if (ret == VM_FAULT_RETRY &&
- !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
- /* The BO has already been unreserved. */
- return ret;
- }
-
- goto out_unlock;
- }
+ if (unlikely(ret != 0))
+ return ret;
err = ttm_mem_io_lock(man, true);
- if (unlikely(err != 0)) {
- ret = VM_FAULT_NOPAGE;
- goto out_unlock;
- }
+ if (unlikely(err != 0))
+ return VM_FAULT_NOPAGE;
err = ttm_mem_io_reserve_vm(bo);
if (unlikely(err != 0)) {
ret = VM_FAULT_SIGBUS;
@@ -220,18 +252,8 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
goto out_io_unlock;
}
- /*
- * Make a local vma copy to modify the page_prot member
- * and vm_flags if necessary. The vma parameter is protected
- * by mmap_sem in write mode.
- */
- cvma = *vma;
- cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags);
-
- if (bo->mem.bus.is_iomem) {
- cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
- cvma.vm_page_prot);
- } else {
+ cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, prot);
+ if (!bo->mem.bus.is_iomem) {
struct ttm_operation_ctx ctx = {
.interruptible = false,
.no_wait_gpu = false,
@@ -240,24 +262,21 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
};
ttm = bo->ttm;
- cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
- cvma.vm_page_prot);
-
- /* Allocate all page at once, most common usage */
- if (ttm_tt_populate(ttm, &ctx)) {
+ if (ttm_tt_populate(bo->ttm, &ctx)) {
ret = VM_FAULT_OOM;
goto out_io_unlock;
}
+ } else {
+ /* Iomem should not be marked encrypted */
+ cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
}
/*
* Speculatively prefault a number of pages. Only error on
* first page.
*/
- for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
+ for (i = 0; i < num_prefault; ++i) {
if (bo->mem.bus.is_iomem) {
- /* Iomem should not be marked encrypted */
- cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
pfn = ttm_bo_io_mem_pfn(bo, page_offset);
} else {
page = ttm->pages[page_offset];
@@ -293,28 +312,49 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
ret = VM_FAULT_NOPAGE;
out_io_unlock:
ttm_mem_io_unlock(man);
-out_unlock:
+ return ret;
+}
+EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
+
+static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ pgprot_t prot;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
+ vm_fault_t ret;
+
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ if (ret)
+ return ret;
+
+ prot = vm_get_page_prot(vma->vm_flags);
+ ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
dma_resv_unlock(bo->base.resv);
+
return ret;
}
-static void ttm_bo_vm_open(struct vm_area_struct *vma)
+void ttm_bo_vm_open(struct vm_area_struct *vma)
{
- struct ttm_buffer_object *bo =
- (struct ttm_buffer_object *)vma->vm_private_data;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping);
ttm_bo_get(bo);
}
+EXPORT_SYMBOL(ttm_bo_vm_open);
-static void ttm_bo_vm_close(struct vm_area_struct *vma)
+void ttm_bo_vm_close(struct vm_area_struct *vma)
{
- struct ttm_buffer_object *bo = (struct ttm_buffer_object *)vma->vm_private_data;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
ttm_bo_put(bo);
vma->vm_private_data = NULL;
}
+EXPORT_SYMBOL(ttm_bo_vm_close);
static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
unsigned long offset,
diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig
index 6b28a326f8bb..15acdf2a7c0f 100644
--- a/drivers/gpu/drm/vmwgfx/Kconfig
+++ b/drivers/gpu/drm/vmwgfx/Kconfig
@@ -8,6 +8,7 @@ config DRM_VMWGFX
select FB_CFB_IMAGEBLIT
select DRM_TTM
select FB
+ select MAPPING_DIRTY_HELPERS
# Only needed for the transitional use of drm_crtc_init - can be removed
# again once vmwgfx sets up the primary plane itself.
select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index 8841bd30e1e5..c877a21a0739 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -8,7 +8,7 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
vmwgfx_cmdbuf_res.o vmwgfx_cmdbuf.o vmwgfx_stdu.o \
vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \
vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \
- vmwgfx_validation.o \
+ vmwgfx_validation.o vmwgfx_page_dirty.o \
ttm_object.o ttm_lock.o
obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
index f2bfd3d80598..61414f105c67 100644
--- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
+++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
@@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
return offset;
}
-
static inline u32
svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
surf_size_struct baseLevelSize,
@@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format)
return svga3dsurface_is_dx_screen_target_format(format);
}
+/**
+ * struct svga3dsurface_mip - Mimpmap level information
+ * @bytes: Bytes required in the backing store of this mipmap level.
+ * @img_stride: Byte stride per image.
+ * @row_stride: Byte stride per block row.
+ * @size: The size of the mipmap.
+ */
+struct svga3dsurface_mip {
+ size_t bytes;
+ size_t img_stride;
+ size_t row_stride;
+ struct drm_vmw_size size;
+
+};
+
+/**
+ * struct svga3dsurface_cache - Cached surface information
+ * @desc: Pointer to the surface descriptor
+ * @mip: Array of mipmap level information. Valid size is @num_mip_levels.
+ * @mip_chain_bytes: Bytes required in the backing store for the whole chain
+ * of mip levels.
+ * @sheet_bytes: Bytes required in the backing store for a sheet
+ * representing a single sample.
+ * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in
+ * a chain.
+ * @num_layers: Number of slices in an array texture or number of faces in
+ * a cubemap texture.
+ */
+struct svga3dsurface_cache {
+ const struct svga3d_surface_desc *desc;
+ struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS];
+ size_t mip_chain_bytes;
+ size_t sheet_bytes;
+ u32 num_mip_levels;
+ u32 num_layers;
+};
+
+/**
+ * struct svga3dsurface_loc - Surface location
+ * @sub_resource: Surface subresource. Defined as layer * num_mip_levels +
+ * mip_level.
+ * @x: X coordinate.
+ * @y: Y coordinate.
+ * @z: Z coordinate.
+ */
+struct svga3dsurface_loc {
+ u32 sub_resource;
+ u32 x, y, z;
+};
+
+/**
+ * svga3dsurface_subres - Compute the subresource from layer and mipmap.
+ * @cache: Surface layout data.
+ * @mip_level: The mipmap level.
+ * @layer: The surface layer (face or array slice).
+ *
+ * Return: The subresource.
+ */
+static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache,
+ u32 mip_level, u32 layer)
+{
+ return cache->num_mip_levels * layer + mip_level;
+}
+
+/**
+ * svga3dsurface_setup_cache - Build a surface cache entry
+ * @size: The surface base level dimensions.
+ * @format: The surface format.
+ * @num_mip_levels: Number of mipmap levels.
+ * @num_layers: Number of layers.
+ * @cache: Pointer to a struct svga3dsurface_cach object to be filled in.
+ *
+ * Return: Zero on success, -EINVAL on invalid surface layout.
+ */
+static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size,
+ SVGA3dSurfaceFormat format,
+ u32 num_mip_levels,
+ u32 num_layers,
+ u32 num_samples,
+ struct svga3dsurface_cache *cache)
+{
+ const struct svga3d_surface_desc *desc;
+ u32 i;
+
+ memset(cache, 0, sizeof(*cache));
+ cache->desc = desc = svga3dsurface_get_desc(format);
+ cache->num_mip_levels = num_mip_levels;
+ cache->num_layers = num_layers;
+ for (i = 0; i < cache->num_mip_levels; i++) {
+ struct svga3dsurface_mip *mip = &cache->mip[i];
+
+ mip->size = svga3dsurface_get_mip_size(*size, i);
+ mip->bytes = svga3dsurface_get_image_buffer_size
+ (desc, &mip->size, 0);
+ mip->row_stride =
+ __KERNEL_DIV_ROUND_UP(mip->size.width,
+ desc->block_size.width) *
+ desc->bytes_per_block * num_samples;
+ if (!mip->row_stride)
+ goto invalid_dim;
+
+ mip->img_stride =
+ __KERNEL_DIV_ROUND_UP(mip->size.height,
+ desc->block_size.height) *
+ mip->row_stride;
+ if (!mip->img_stride)
+ goto invalid_dim;
+
+ cache->mip_chain_bytes += mip->bytes;
+ }
+ cache->sheet_bytes = cache->mip_chain_bytes * num_layers;
+ if (!cache->sheet_bytes)
+ goto invalid_dim;
+
+ return 0;
+
+invalid_dim:
+ VMW_DEBUG_USER("Invalid surface layout for dirty tracking.\n");
+ return -EINVAL;
+}
+
+/**
+ * svga3dsurface_get_loc - Get a surface location from an offset into the
+ * backing store
+ * @cache: Surface layout data.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ * @offset: Offset into the surface backing store.
+ */
+static inline void
+svga3dsurface_get_loc(const struct svga3dsurface_cache *cache,
+ struct svga3dsurface_loc *loc,
+ size_t offset)
+{
+ const struct svga3dsurface_mip *mip = &cache->mip[0];
+ const struct svga3d_surface_desc *desc = cache->desc;
+ u32 layer;
+ int i;
+
+ if (offset >= cache->sheet_bytes)
+ offset %= cache->sheet_bytes;
+
+ layer = offset / cache->mip_chain_bytes;
+ offset -= layer * cache->mip_chain_bytes;
+ for (i = 0; i < cache->num_mip_levels; ++i, ++mip) {
+ if (mip->bytes > offset)
+ break;
+ offset -= mip->bytes;
+ }
+
+ loc->sub_resource = svga3dsurface_subres(cache, i, layer);
+ loc->z = offset / mip->img_stride;
+ offset -= loc->z * mip->img_stride;
+ loc->z *= desc->block_size.depth;
+ loc->y = offset / mip->row_stride;
+ offset -= loc->y * mip->row_stride;
+ loc->y *= desc->block_size.height;
+ loc->x = offset / desc->bytes_per_block;
+ loc->x *= desc->block_size.width;
+}
+
+/**
+ * svga3dsurface_inc_loc - Clamp increment a surface location with one block
+ * size
+ * in each dimension.
+ * @loc: Pointer to a struct svga3dsurface_loc to be incremented.
+ *
+ * When computing the size of a range as size = end - start, the range does not
+ * include the end element. However a location representing the last byte
+ * of a touched region in the backing store *is* included in the range.
+ * This function modifies such a location to match the end definition
+ * given as start + size which is the one used in a SVGA3dBox.
+ */
+static inline void
+svga3dsurface_inc_loc(const struct svga3dsurface_cache *cache,
+ struct svga3dsurface_loc *loc)
+{
+ const struct svga3d_surface_desc *desc = cache->desc;
+ u32 mip = loc->sub_resource % cache->num_mip_levels;
+ const struct drm_vmw_size *size = &cache->mip[mip].size;
+
+ loc->sub_resource++;
+ loc->x += desc->block_size.width;
+ if (loc->x > size->width)
+ loc->x = size->width;
+ loc->y += desc->block_size.height;
+ if (loc->y > size->height)
+ loc->y = size->height;
+ loc->z += desc->block_size.depth;
+ if (loc->z > size->depth)
+ loc->z = size->depth;
+}
+
+/**
+ * svga3dsurface_min_loc - The start location in a subresource
+ * @cache: Surface layout data.
+ * @sub_resource: The subresource.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ */
+static inline void
+svga3dsurface_min_loc(const struct svga3dsurface_cache *cache,
+ u32 sub_resource,
+ struct svga3dsurface_loc *loc)
+{
+ loc->sub_resource = sub_resource;
+ loc->x = loc->y = loc->z = 0;
+}
+
+/**
+ * svga3dsurface_min_loc - The end location in a subresource
+ * @cache: Surface layout data.
+ * @sub_resource: The subresource.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ *
+ * Following the end definition given in svga3dsurface_inc_loc(),
+ * Compute the end location of a surface subresource.
+ */
+static inline void
+svga3dsurface_max_loc(const struct svga3dsurface_cache *cache,
+ u32 sub_resource,
+ struct svga3dsurface_loc *loc)
+{
+ const struct drm_vmw_size *size;
+ u32 mip;
+
+ loc->sub_resource = sub_resource + 1;
+ mip = sub_resource % cache->num_mip_levels;
+ size = &cache->mip[mip].size;
+ loc->x = size->width;
+ loc->y = size->height;
+ loc->z = size->depth;
+}
+
#endif /* _SVGA3D_SURFACEDEFS_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 74016a08d118..8b71bf6b58ef 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -462,6 +462,8 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
{
struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
+ WARN_ON(vmw_bo->dirty);
+ WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree));
vmw_bo_unmap(vmw_bo);
kfree(vmw_bo);
}
@@ -475,8 +477,11 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
static void vmw_user_bo_destroy(struct ttm_buffer_object *bo)
{
struct vmw_user_buffer_object *vmw_user_bo = vmw_user_buffer_object(bo);
+ struct vmw_buffer_object *vbo = &vmw_user_bo->vbo;
- vmw_bo_unmap(&vmw_user_bo->vbo);
+ WARN_ON(vbo->dirty);
+ WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree));
+ vmw_bo_unmap(vbo);
ttm_prime_object_kfree(vmw_user_bo, prime);
}
@@ -511,8 +516,7 @@ int vmw_bo_init(struct vmw_private *dev_priv,
memset(vmw_bo, 0, sizeof(*vmw_bo));
BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
vmw_bo->base.priority = 3;
-
- INIT_LIST_HEAD(&vmw_bo->res_list);
+ vmw_bo->res_tree = RB_ROOT;
ret = ttm_bo_init(bdev, &vmw_bo->base, size,
ttm_bo_type_device, placement,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index b18842f73081..a31e726d6d71 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -56,9 +56,9 @@
#define VMWGFX_DRIVER_NAME "vmwgfx"
-#define VMWGFX_DRIVER_DATE "20180704"
+#define VMWGFX_DRIVER_DATE "20190328"
#define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 15
+#define VMWGFX_DRIVER_MINOR 16
#define VMWGFX_DRIVER_PATCHLEVEL 0
#define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
#define VMWGFX_MAX_RELOCATIONS 2048
@@ -100,17 +100,18 @@ struct vmw_fpriv {
/**
* struct vmw_buffer_object - TTM buffer object with vmwgfx additions
* @base: The TTM buffer object
- * @res_list: List of resources using this buffer object as a backing MOB
+ * @res_tree: RB tree of resources using this buffer object as a backing MOB
* @pin_count: pin depth
* @cpu_writers: Number of synccpu write grabs. Protected by reservation when
* increased. May be decreased without reservation.
* @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
* @map: Kmap object for semi-persistent mappings
* @res_prios: Eviction priority counts for attached resources
+ * @dirty: structure for user-space dirty-tracking
*/
struct vmw_buffer_object {
struct ttm_buffer_object base;
- struct list_head res_list;
+ struct rb_root res_tree;
s32 pin_count;
atomic_t cpu_writers;
/* Not ref-counted. Protected by binding_mutex */
@@ -118,6 +119,7 @@ struct vmw_buffer_object {
/* Protected by reservation */
struct ttm_bo_kmap_obj map;
u32 res_prios[TTM_MAX_BO_PRIORITY];
+ struct vmw_bo_dirty *dirty;
};
/**
@@ -148,7 +150,8 @@ struct vmw_res_func;
* @res_dirty: Resource contains data not yet in the backup buffer. Protected
* by resource reserved.
* @backup_dirty: Backup buffer contains data not yet in the HW resource.
- * Protecte by resource reserved.
+ * Protected by resource reserved.
+ * @coherent: Emulate coherency by tracking vm accesses.
* @backup: The backup buffer if any. Protected by resource reserved.
* @backup_offset: Offset into the backup buffer if any. Protected by resource
* reserved. Note that only a few resource types can have a @backup_offset
@@ -157,29 +160,32 @@ struct vmw_res_func;
* pin-count greater than zero. It is not on the resource LRU lists and its
* backup buffer is pinned. Hence it can't be evicted.
* @func: Method vtable for this resource. Immutable.
+ * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved.
* @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock.
- * @mob_head: List head for the MOB backup list. Protected by @backup reserved.
* @binding_head: List head for the context binding list. Protected by
* the @dev_priv::binding_mutex
* @res_free: The resource destructor.
* @hw_destroy: Callback to destroy the resource on the device, as part of
* resource destruction.
*/
+struct vmw_resource_dirty;
struct vmw_resource {
struct kref kref;
struct vmw_private *dev_priv;
int id;
u32 used_prio;
unsigned long backup_size;
- bool res_dirty;
- bool backup_dirty;
+ u32 res_dirty : 1;
+ u32 backup_dirty : 1;
+ u32 coherent : 1;
struct vmw_buffer_object *backup;
unsigned long backup_offset;
unsigned long pin_count;
const struct vmw_res_func *func;
+ struct rb_node mob_node;
struct list_head lru_head;
- struct list_head mob_head;
struct list_head binding_head;
+ struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res);
void (*hw_destroy) (struct vmw_resource *res);
};
@@ -678,7 +684,8 @@ extern void vmw_resource_unreference(struct vmw_resource **p_res);
extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
extern struct vmw_resource *
vmw_resource_reference_unless_doomed(struct vmw_resource *res);
-extern int vmw_resource_validate(struct vmw_resource *res, bool intr);
+extern int vmw_resource_validate(struct vmw_resource *res, bool intr,
+ bool dirtying);
extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible,
bool no_backup);
extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
@@ -720,6 +727,10 @@ extern void vmw_resource_evict_all(struct vmw_private *dev_priv);
extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo);
void vmw_resource_mob_attach(struct vmw_resource *res);
void vmw_resource_mob_detach(struct vmw_resource *res);
+void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
+ pgoff_t end);
+int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
+ pgoff_t end, pgoff_t *num_prefault);
/**
* vmw_resource_mob_attached - Whether a resource currently has a mob attached
@@ -729,7 +740,7 @@ void vmw_resource_mob_detach(struct vmw_resource *res);
*/
static inline bool vmw_resource_mob_attached(const struct vmw_resource *res)
{
- return !list_empty(&res->mob_head);
+ return !RB_EMPTY_NODE(&res->mob_node);
}
/**
@@ -1407,6 +1418,17 @@ int vmw_host_log(const char *log);
#define VMW_DEBUG_USER(fmt, ...) \
DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
+/* Resource dirtying - vmwgfx_page_dirty.c */
+void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo);
+int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
+void vmw_bo_dirty_clear_res(struct vmw_resource *res);
+void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+ pgoff_t start, pgoff_t end);
+vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
+vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
+
/**
* VMW_DEBUG_KMS - Debug output for kernel mode-setting
*
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index ff86d49dc5e8..934ad7c0c342 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct vmw_private *dev_priv,
offsetof(typeof(*cmd), sid));
cmd = container_of(header, typeof(*cmd), header);
-
return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
VMW_RES_DIRTY_NONE, user_surface_converter,
&cmd->sid, NULL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
new file mode 100644
index 000000000000..f07aa857587c
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -0,0 +1,488 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/**************************************************************************
+ *
+ * Copyright 2019 VMware, Inc., Palo Alto, CA., USA
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "vmwgfx_drv.h"
+
+/*
+ * Different methods for tracking dirty:
+ * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits
+ * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write-
+ * accesses in the VM mkwrite() callback
+ */
+enum vmw_bo_dirty_method {
+ VMW_BO_DIRTY_PAGETABLE,
+ VMW_BO_DIRTY_MKWRITE,
+};
+
+/*
+ * No dirtied pages at scan trigger a transition to the _MKWRITE method,
+ * similarly a certain percentage of dirty pages trigger a transition to
+ * the _PAGETABLE method. How many triggers should we wait for before
+ * changing method?
+ */
+#define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2
+
+/* Percentage to trigger a transition to the _PAGETABLE method */
+#define VMW_DIRTY_PERCENTAGE 10
+
+/**
+ * struct vmw_bo_dirty - Dirty information for buffer objects
+ * @start: First currently dirty bit
+ * @end: Last currently dirty bit + 1
+ * @method: The currently used dirty method
+ * @change_count: Number of consecutive method change triggers
+ * @ref_count: Reference count for this structure
+ * @bitmap_size: The size of the bitmap in bits. Typically equal to the
+ * nuber of pages in the bo.
+ * @size: The accounting size for this struct.
+ * @bitmap: A bitmap where each bit represents a page. A set bit means a
+ * dirty page.
+ */
+struct vmw_bo_dirty {
+ unsigned long start;
+ unsigned long end;
+ enum vmw_bo_dirty_method method;
+ unsigned int change_count;
+ unsigned int ref_count;
+ unsigned long bitmap_size;
+ size_t size;
+ unsigned long bitmap[0];
+};
+
+/**
+ * vmw_bo_dirty_scan_pagetable - Perform a pagetable scan for dirty bits
+ * @vbo: The buffer object to scan
+ *
+ * Scans the pagetable for dirty bits. Clear those bits and modify the
+ * dirty structure with the results. This function may change the
+ * dirty-tracking method.
+ */
+static void vmw_bo_dirty_scan_pagetable(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node);
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+ pgoff_t num_marked;
+
+ num_marked = clean_record_shared_mapping_range
+ (mapping,
+ offset, dirty->bitmap_size,
+ offset, &dirty->bitmap[0],
+ &dirty->start, &dirty->end);
+ if (num_marked == 0)
+ dirty->change_count++;
+ else
+ dirty->change_count = 0;
+
+ if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
+ dirty->change_count = 0;
+ dirty->method = VMW_BO_DIRTY_MKWRITE;
+ wp_shared_mapping_range(mapping,
+ offset, dirty->bitmap_size);
+ clean_record_shared_mapping_range(mapping,
+ offset, dirty->bitmap_size,
+ offset, &dirty->bitmap[0],
+ &dirty->start, &dirty->end);
+ }
+}
+
+/**
+ * vmw_bo_dirty_scan_mkwrite - Reset the mkwrite dirty-tracking method
+ * @vbo: The buffer object to scan
+ *
+ * Write-protect pages written to so that consecutive write accesses will
+ * trigger a call to mkwrite.
+ *
+ * This function may change the dirty-tracking method.
+ */
+static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+ pgoff_t num_marked;
+
+ if (dirty->end <= dirty->start)
+ return;
+
+ num_marked = wp_shared_mapping_range(vbo->base.bdev->dev_mapping,
+ dirty->start + offset,
+ dirty->end - dirty->start);
+
+ if (100UL * num_marked / dirty->bitmap_size >
+ VMW_DIRTY_PERCENTAGE) {
+ dirty->change_count++;
+ } else {
+ dirty->change_count = 0;
+ }
+
+ if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
+ pgoff_t start = 0;
+ pgoff_t end = dirty->bitmap_size;
+
+ dirty->method = VMW_BO_DIRTY_PAGETABLE;
+ clean_record_shared_mapping_range(mapping, offset, end, offset,
+ &dirty->bitmap[0],
+ &start, &end);
+ bitmap_clear(&dirty->bitmap[0], 0, dirty->bitmap_size);
+ if (dirty->start < dirty->end)
+ bitmap_set(&dirty->bitmap[0], dirty->start,
+ dirty->end - dirty->start);
+ dirty->change_count = 0;
+ }
+}
+
+/**
+ * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty
+ * tracking structure
+ * @vbo: The buffer object to scan
+ *
+ * This function may change the dirty tracking method.
+ */
+void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+
+ if (dirty->method == VMW_BO_DIRTY_PAGETABLE)
+ vmw_bo_dirty_scan_pagetable(vbo);
+ else
+ vmw_bo_dirty_scan_mkwrite(vbo);
+}
+
+/**
+ * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before
+ * an unmap_mapping_range operation.
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * If we're using the _PAGETABLE scan method, we may leak dirty pages
+ * when calling unmap_mapping_range(). This function makes sure we pick
+ * up all dirty pages.
+ */
+static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo,
+ pgoff_t start, pgoff_t end)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+ if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end)
+ return;
+
+ wp_shared_mapping_range(mapping, start + offset, end - start);
+ clean_record_shared_mapping_range(mapping, start + offset,
+ end - start, offset,
+ &dirty->bitmap[0], &dirty->start,
+ &dirty->end);
+}
+
+/**
+ * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange.
+ */
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+ pgoff_t start, pgoff_t end)
+{
+ unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+ vmw_bo_dirty_pre_unmap(vbo, start, end);
+ unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT,
+ (loff_t) (end - start) << PAGE_SHIFT);
+}
+
+/**
+ * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object
+ * @vbo: The buffer object
+ *
+ * This function registers a dirty-tracking user to a buffer object.
+ * A user can be for example a resource or a vma in a special user-space
+ * mapping.
+ *
+ * Return: Zero on success, -ENOMEM on memory allocation failure.
+ */
+int vmw_bo_dirty_add(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ pgoff_t num_pages = vbo->base.num_pages;
+ size_t size, acc_size;
+ int ret;
+ static struct ttm_operation_ctx ctx = {
+ .interruptible = false,
+ .no_wait_gpu = false
+ };
+
+ if (dirty) {
+ dirty->ref_count++;
+ return 0;
+ }
+
+ size = sizeof(*dirty) + BITS_TO_LONGS(num_pages) * sizeof(long);
+ acc_size = ttm_round_pot(size);
+ ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx);
+ if (ret) {
+ VMW_DEBUG_USER("Out of graphics memory for buffer object "
+ "dirty tracker.\n");
+ return ret;
+ }
+ dirty = kvzalloc(size, GFP_KERNEL);
+ if (!dirty) {
+ ret = -ENOMEM;
+ goto out_no_dirty;
+ }
+
+ dirty->size = acc_size;
+ dirty->bitmap_size = num_pages;
+ dirty->start = dirty->bitmap_size;
+ dirty->end = 0;
+ dirty->ref_count = 1;
+ if (num_pages < PAGE_SIZE / sizeof(pte_t)) {
+ dirty->method = VMW_BO_DIRTY_PAGETABLE;
+ } else {
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+ pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node);
+
+ dirty->method = VMW_BO_DIRTY_MKWRITE;
+
+ /* Write-protect and then pick up already dirty bits */
+ wp_shared_mapping_range(mapping, offset, num_pages);
+ clean_record_shared_mapping_range(mapping, offset, num_pages,
+ offset,
+ &dirty->bitmap[0],
+ &dirty->start, &dirty->end);
+ }
+
+ vbo->dirty = dirty;
+
+ return 0;
+
+out_no_dirty:
+ ttm_mem_global_free(&ttm_mem_glob, acc_size);
+ return ret;
+}
+
+/**
+ * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object
+ * @vbo: The buffer object
+ *
+ * This function releases a dirty-tracking user from a buffer object.
+ * If the reference count reaches zero, then the dirty-tracking object is
+ * freed and the pointer to it cleared.
+ *
+ * Return: Zero on success, -ENOMEM on memory allocation failure.
+ */
+void vmw_bo_dirty_release(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+
+ if (dirty && --dirty->ref_count == 0) {
+ size_t acc_size = dirty->size;
+
+ kvfree(dirty);
+ ttm_mem_global_free(&ttm_mem_glob, acc_size);
+ vbo->dirty = NULL;
+ }
+}
+
+/**
+ * vmw_bo_dirty_transfer_to_res - Pick up a resource's dirty region from
+ * its backing mob.
+ * @res: The resource
+ *
+ * This function will pick up all dirty ranges affecting the resource from
+ * it's backup mob, and call vmw_resource_dirty_update() once for each
+ * range. The transferred ranges will be cleared from the backing mob's
+ * dirty tracking.
+ */
+void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res)
+{
+ struct vmw_buffer_object *vbo = res->backup;
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ pgoff_t start, cur, end;
+ unsigned long res_start = res->backup_offset;
+ unsigned long res_end = res->backup_offset + res->backup_size;
+
+ WARN_ON_ONCE(res_start & ~PAGE_MASK);
+ res_start >>= PAGE_SHIFT;
+ res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
+
+ if (res_start >= dirty->end || res_end <= dirty->start)
+ return;
+
+ cur = max(res_start, dirty->start);
+ res_end = max(res_end, dirty->end);
+ while (cur < res_end) {
+ unsigned long num;
+
+ start = find_next_bit(&dirty->bitmap[0], res_end, cur);
+ if (start >= res_end)
+ break;
+
+ end = find_next_zero_bit(&dirty->bitmap[0], res_end, start + 1);
+ cur = end + 1;
+ num = end - start;
+ bitmap_clear(&dirty->bitmap[0], start, num);
+ vmw_resource_dirty_update(res, start, end);
+ }
+
+ if (res_start <= dirty->start && res_end > dirty->start)
+ dirty->start = res_end;
+ if (res_start < dirty->end && res_end >= dirty->end)
+ dirty->end = res_start;
+}
+
+/**
+ * vmw_bo_dirty_clear_res - Clear a resource's dirty region from
+ * its backing mob.
+ * @res: The resource
+ *
+ * This function will clear all dirty ranges affecting the resource from
+ * it's backup mob's dirty tracking.
+ */
+void vmw_bo_dirty_clear_res(struct vmw_resource *res)
+{
+ unsigned long res_start = res->backup_offset;
+ unsigned long res_end = res->backup_offset + res->backup_size;
+ struct vmw_buffer_object *vbo = res->backup;
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+
+ res_start >>= PAGE_SHIFT;
+ res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
+
+ if (res_start >= dirty->end || res_end <= dirty->start)
+ return;
+
+ res_start = max(res_start, dirty->start);
+ res_end = min(res_end, dirty->end);
+ bitmap_clear(&dirty->bitmap[0], res_start, res_end - res_start);
+
+ if (res_start <= dirty->start && res_end > dirty->start)
+ dirty->start = res_end;
+ if (res_start < dirty->end && res_end >= dirty->end)
+ dirty->end = res_start;
+}
+
+vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+ vma->vm_private_data;
+ vm_fault_t ret;
+ unsigned long page_offset;
+ unsigned int save_flags;
+ struct vmw_buffer_object *vbo =
+ container_of(bo, typeof(*vbo), base);
+
+ /*
+ * mkwrite() doesn't handle the VM_FAULT_RETRY return value correctly.
+ * So make sure the TTM helpers are aware.
+ */
+ save_flags = vmf->flags;
+ vmf->flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ vmf->flags = save_flags;
+ if (ret)
+ return ret;
+
+ page_offset = vmf->pgoff - drm_vma_node_start(&bo->base.vma_node);
+ if (unlikely(page_offset >= bo->num_pages)) {
+ ret = VM_FAULT_SIGBUS;
+ goto out_unlock;
+ }
+
+ if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE &&
+ !test_bit(page_offset, &vbo->dirty->bitmap[0])) {
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+
+ __set_bit(page_offset, &dirty->bitmap[0]);
+ dirty->start = min(dirty->start, page_offset);
+ dirty->end = max(dirty->end, page_offset + 1);
+ }
+
+out_unlock:
+ dma_resv_unlock(bo->base.resv);
+ return ret;
+}
+
+vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+ vma->vm_private_data;
+ struct vmw_buffer_object *vbo =
+ container_of(bo, struct vmw_buffer_object, base);
+ pgoff_t num_prefault;
+ pgprot_t prot;
+ vm_fault_t ret;
+
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ if (ret)
+ return ret;
+
+ num_prefault = (vma->vm_flags & VM_RAND_READ) ? 1 :
+ TTM_BO_VM_NUM_PREFAULT;
+
+ if (vbo->dirty) {
+ pgoff_t allowed_prefault;
+ unsigned long page_offset;
+
+ page_offset = vmf->pgoff -
+ drm_vma_node_start(&bo->base.vma_node);
+ if (page_offset >= bo->num_pages ||
+ vmw_resources_clean(vbo, page_offset,
+ page_offset + PAGE_SIZE,
+ &allowed_prefault)) {
+ ret = VM_FAULT_SIGBUS;
+ goto out_unlock;
+ }
+
+ num_prefault = min(num_prefault, allowed_prefault);
+ }
+
+ /*
+ * If we don't track dirty using the MKWRITE method, make sure
+ * sure the page protection is write-enabled so we don't get
+ * a lot of unnecessary write faults.
+ */
+ if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE)
+ prot = vma->vm_page_prot;
+ else
+ prot = vm_get_page_prot(vma->vm_flags);
+
+ ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault);
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
+out_unlock:
+ dma_resv_unlock(bo->base.resv);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 6dfe36fb817c..c8441030637a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -40,11 +40,24 @@
void vmw_resource_mob_attach(struct vmw_resource *res)
{
struct vmw_buffer_object *backup = res->backup;
+ struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL;
dma_resv_assert_held(res->backup->base.base.resv);
res->used_prio = (res->res_dirty) ? res->func->dirty_prio :
res->func->prio;
- list_add_tail(&res->mob_head, &backup->res_list);
+
+ while (*new) {
+ struct vmw_resource *this =
+ container_of(*new, struct vmw_resource, mob_node);
+
+ parent = *new;
+ new = (res->backup_offset < this->backup_offset) ?
+ &((*new)->rb_left) : &((*new)->rb_right);
+ }
+
+ rb_link_node(&res->mob_node, parent, new);
+ rb_insert_color(&res->mob_node, &backup->res_tree);
+
vmw_bo_prio_add(backup, res->used_prio);
}
@@ -58,7 +71,8 @@ void vmw_resource_mob_detach(struct vmw_resource *res)
dma_resv_assert_held(backup->base.base.resv);
if (vmw_resource_mob_attached(res)) {
- list_del_init(&res->mob_head);
+ rb_erase(&res->mob_node, &backup->res_tree);
+ RB_CLEAR_NODE(&res->mob_node);
vmw_bo_prio_del(backup, res->used_prio);
}
}
@@ -119,6 +133,10 @@ static void vmw_resource_release(struct kref *kref)
}
res->backup_dirty = false;
vmw_resource_mob_detach(res);
+ if (res->dirty)
+ res->func->dirty_free(res);
+ if (res->coherent)
+ vmw_bo_dirty_release(res->backup);
ttm_bo_unreserve(bo);
vmw_bo_unreference(&res->backup);
}
@@ -200,15 +218,17 @@ int vmw_resource_init(struct vmw_private *dev_priv, struct vmw_resource *res,
res->res_free = res_free;
res->dev_priv = dev_priv;
res->func = func;
+ RB_CLEAR_NODE(&res->mob_node);
INIT_LIST_HEAD(&res->lru_head);
- INIT_LIST_HEAD(&res->mob_head);
INIT_LIST_HEAD(&res->binding_head);
res->id = -1;
res->backup = NULL;
res->backup_offset = 0;
res->backup_dirty = false;
res->res_dirty = false;
+ res->coherent = false;
res->used_prio = 3;
+ res->dirty = NULL;
if (delay_id)
return 0;
else
@@ -373,7 +393,8 @@ out_no_bo:
* should be retried once resources have been freed up.
*/
static int vmw_resource_do_validate(struct vmw_resource *res,
- struct ttm_validate_buffer *val_buf)
+ struct ttm_validate_buffer *val_buf,
+ bool dirtying)
{
int ret = 0;
const struct vmw_res_func *func = res->func;
@@ -395,6 +416,39 @@ static int vmw_resource_do_validate(struct vmw_resource *res,
vmw_resource_mob_attach(res);
}
+ /*
+ * Handle the case where the backup mob is marked coherent but
+ * the resource isn't.
+ */
+ if (func->dirty_alloc && vmw_resource_mob_attached(res) &&
+ !res->coherent) {
+ if (res->backup->dirty && !res->dirty) {
+ ret = func->dirty_alloc(res);
+ if (ret)
+ return ret;
+ } else if (!res->backup->dirty && res->dirty) {
+ func->dirty_free(res);
+ }
+ }
+
+ /*
+ * Transfer the dirty regions to the resource and update
+ * the resource.
+ */
+ if (res->dirty) {
+ if (dirtying && !res->res_dirty) {
+ pgoff_t start = res->backup_offset >> PAGE_SHIFT;
+ pgoff_t end = __KERNEL_DIV_ROUND_UP
+ (res->backup_offset + res->backup_size,
+ PAGE_SIZE);
+
+ vmw_bo_dirty_unmap(res->backup, start, end);
+ }
+
+ vmw_bo_dirty_transfer_to_res(res);
+ return func->dirty_sync(res);
+ }
+
return 0;
out_bind_failed:
@@ -433,16 +487,28 @@ void vmw_resource_unreserve(struct vmw_resource *res,
if (switch_backup && new_backup != res->backup) {
if (res->backup) {
vmw_resource_mob_detach(res);
+ if (res->coherent)
+ vmw_bo_dirty_release(res->backup);
vmw_bo_unreference(&res->backup);
}
if (new_backup) {
res->backup = vmw_bo_reference(new_backup);
+
+ /*
+ * The validation code should already have added a
+ * dirty tracker here.
+ */
+ WARN_ON(res->coherent && !new_backup->dirty);
+
vmw_resource_mob_attach(res);
} else {
res->backup = NULL;
}
+ } else if (switch_backup && res->coherent) {
+ vmw_bo_dirty_release(res->backup);
}
+
if (switch_backup)
res->backup_offset = new_backup_offset;
@@ -622,6 +688,7 @@ out_no_unbind:
* to the device.
* @res: The resource to make visible to the device.
* @intr: Perform waits interruptible if possible.
+ * @dirtying: Pending GPU operation will dirty the resource
*
* On succesful return, any backup DMA buffer pointed to by @res->backup will
* be reserved and validated.
@@ -631,7 +698,8 @@ out_no_unbind:
* Return: Zero on success, -ERESTARTSYS if interrupted, negative error code
* on failure.
*/
-int vmw_resource_validate(struct vmw_resource *res, bool intr)
+int vmw_resource_validate(struct vmw_resource *res, bool intr,
+ bool dirtying)
{
int ret;
struct vmw_resource *evict_res;
@@ -648,7 +716,7 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr)
if (res->backup)
val_buf.bo = &res->backup->base;
do {
- ret = vmw_resource_do_validate(res, &val_buf);
+ ret = vmw_resource_do_validate(res, &val_buf, dirtying);
if (likely(ret != -EBUSY))
break;
@@ -711,19 +779,20 @@ out_no_validate:
*/
void vmw_resource_unbind_list(struct vmw_buffer_object *vbo)
{
-
- struct vmw_resource *res, *next;
struct ttm_validate_buffer val_buf = {
.bo = &vbo->base,
.num_shared = 0
};
dma_resv_assert_held(vbo->base.base.resv);
- list_for_each_entry_safe(res, next, &vbo->res_list, mob_head) {
- if (!res->func->unbind)
- continue;
+ while (!RB_EMPTY_ROOT(&vbo->res_tree)) {
+ struct rb_node *node = vbo->res_tree.rb_node;
+ struct vmw_resource *res =
+ container_of(node, struct vmw_resource, mob_node);
+
+ if (!WARN_ON_ONCE(!res->func->unbind))
+ (void) res->func->unbind(res, res->res_dirty, &val_buf);
- (void) res->func->unbind(res, res->res_dirty, &val_buf);
res->backup_dirty = true;
res->res_dirty = false;
vmw_resource_mob_detach(res);
@@ -947,7 +1016,7 @@ int vmw_resource_pin(struct vmw_resource *res, bool interruptible)
/* Do we really need to pin the MOB as well? */
vmw_bo_pin_reserved(vbo, true);
}
- ret = vmw_resource_validate(res, interruptible);
+ ret = vmw_resource_validate(res, interruptible, true);
if (vbo)
ttm_bo_unreserve(&vbo->base);
if (ret)
@@ -1007,3 +1076,101 @@ enum vmw_res_type vmw_res_type(const struct vmw_resource *res)
{
return res->func->res_type;
}
+
+/**
+ * vmw_resource_update_dirty - Update a resource's dirty tracker with a
+ * sequential range of touched backing store memory.
+ * @res: The resource.
+ * @start: The first page touched.
+ * @end: The last page touched + 1.
+ */
+void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
+ pgoff_t end)
+{
+ if (res->dirty)
+ res->func->dirty_range_add(res, start << PAGE_SHIFT,
+ end << PAGE_SHIFT);
+}
+
+/**
+ * vmw_resources_clean - Clean resources intersecting a mob range
+ * @vbo: The mob buffer object
+ * @start: The mob page offset starting the range
+ * @end: The mob page offset ending the range
+ * @num_prefault: Returns how many pages including the first have been
+ * cleaned and are ok to prefault
+ */
+int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
+ pgoff_t end, pgoff_t *num_prefault)
+{
+ struct rb_node *cur = vbo->res_tree.rb_node;
+ struct vmw_resource *found = NULL;
+ unsigned long res_start = start << PAGE_SHIFT;
+ unsigned long res_end = end << PAGE_SHIFT;
+ unsigned long last_cleaned = 0;
+
+ /*
+ * Find the resource with lowest backup_offset that intersects the
+ * range.
+ */
+ while (cur) {
+ struct vmw_resource *cur_res =
+ container_of(cur, struct vmw_resource, mob_node);
+
+ if (cur_res->backup_offset >= res_end) {
+ cur = cur->rb_left;
+ } else if (cur_res->backup_offset + cur_res->backup_size <=
+ res_start) {
+ cur = cur->rb_right;
+ } else {
+ found = cur_res;
+ cur = cur->rb_left;
+ /* Continue to look for resources with lower offsets */
+ }
+ }
+
+ /*
+ * In order of increasing backup_offset, clean dirty resorces
+ * intersecting the range.
+ */
+ while (found) {
+ if (found->res_dirty) {
+ int ret;
+
+ if (!found->func->clean)
+ return -EINVAL;
+
+ ret = found->func->clean(found);
+ if (ret)
+ return ret;
+
+ found->res_dirty = false;
+ }
+ last_cleaned = found->backup_offset + found->backup_size;
+ cur = rb_next(&found->mob_node);
+ if (!cur)
+ break;
+
+ found = container_of(cur, struct vmw_resource, mob_node);
+ if (found->backup_offset >= res_end)
+ break;
+ }
+
+ /*
+ * Set number of pages allowed prefaulting and fence the buffer object
+ */
+ *num_prefault = 1;
+ if (last_cleaned > res_start) {
+ struct ttm_buffer_object *bo = &vbo->base;
+
+ *num_prefault = __KERNEL_DIV_ROUND_UP(last_cleaned - res_start,
+ PAGE_SIZE);
+ vmw_bo_fence_single(bo, NULL);
+ if (bo->moving)
+ dma_fence_put(bo->moving);
+ bo->moving = dma_fence_get
+ (dma_resv_get_excl(bo->base.resv));
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
index 984e588c62ca..3b7438b2d289 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
@@ -71,6 +71,13 @@ struct vmw_user_resource_conv {
* @commit_notify: If the resource is a command buffer managed resource,
* callback to notify that a define or remove command
* has been committed to the device.
+ * @dirty_alloc: Allocate a dirty tracker. NULL if dirty-tracking is not
+ * supported.
+ * @dirty_free: Free the dirty tracker.
+ * @dirty_sync: Upload the dirty mob contents to the resource.
+ * @dirty_add_range: Add a sequential dirty range to the resource
+ * dirty tracker.
+ * @clean: Clean the resource.
*/
struct vmw_res_func {
enum vmw_res_type res_type;
@@ -90,6 +97,12 @@ struct vmw_res_func {
struct ttm_validate_buffer *val_buf);
void (*commit_notify)(struct vmw_resource *res,
enum vmw_cmdbuf_res_state state);
+ int (*dirty_alloc)(struct vmw_resource *res);
+ void (*dirty_free)(struct vmw_resource *res);
+ int (*dirty_sync)(struct vmw_resource *res);
+ void (*dirty_range_add)(struct vmw_resource *res, size_t start,
+ size_t end);
+ int (*clean)(struct vmw_resource *res);
};
/**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index de0530b4dc1b..32b9131b2bae 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -68,6 +68,20 @@ struct vmw_surface_offset {
uint32_t bo_offset;
};
+/**
+ * vmw_surface_dirty - Surface dirty-tracker
+ * @cache: Cached layout information of the surface.
+ * @size: Accounting size for the struct vmw_surface_dirty.
+ * @num_subres: Number of subresources.
+ * @boxes: Array of SVGA3dBoxes indicating dirty regions. One per subresource.
+ */
+struct vmw_surface_dirty {
+ struct svga3dsurface_cache cache;
+ size_t size;
+ u32 num_subres;
+ SVGA3dBox boxes[0];
+};
+
static void vmw_user_surface_free(struct vmw_resource *res);
static struct vmw_resource *
vmw_user_surface_base_to_res(struct ttm_base_object *base);
@@ -96,6 +110,13 @@ vmw_gb_surface_reference_internal(struct drm_device *dev,
struct drm_vmw_gb_surface_ref_ext_rep *rep,
struct drm_file *file_priv);
+static void vmw_surface_dirty_free(struct vmw_resource *res);
+static int vmw_surface_dirty_alloc(struct vmw_resource *res);
+static int vmw_surface_dirty_sync(struct vmw_resource *res);
+static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start,
+ size_t end);
+static int vmw_surface_clean(struct vmw_resource *res);
+
static const struct vmw_user_resource_conv user_surface_conv = {
.object_type = VMW_RES_SURFACE,
.base_obj_to_res = vmw_user_surface_base_to_res,
@@ -133,7 +154,12 @@ static const struct vmw_res_func vmw_gb_surface_func = {
.create = vmw_gb_surface_create,
.destroy = vmw_gb_surface_destroy,
.bind = vmw_gb_surface_bind,
- .unbind = vmw_gb_surface_unbind
+ .unbind = vmw_gb_surface_unbind,
+ .dirty_alloc = vmw_surface_dirty_alloc,
+ .dirty_free = vmw_surface_dirty_free,
+ .dirty_sync = vmw_surface_dirty_sync,
+ .dirty_range_add = vmw_surface_dirty_range_add,
+ .clean = vmw_surface_clean,
};
/**
@@ -639,6 +665,7 @@ static void vmw_user_surface_free(struct vmw_resource *res)
struct vmw_private *dev_priv = srf->res.dev_priv;
uint32_t size = user_srf->size;
+ WARN_ON_ONCE(res->dirty);
if (user_srf->master)
drm_master_put(&user_srf->master);
kfree(srf->offsets);
@@ -1166,10 +1193,16 @@ static int vmw_gb_surface_bind(struct vmw_resource *res,
cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_SURFACE;
cmd2->header.size = sizeof(cmd2->body);
cmd2->body.sid = res->id;
- res->backup_dirty = false;
}
vmw_fifo_commit(dev_priv, submit_size);
+ if (res->backup->dirty && res->backup_dirty) {
+ /* We've just made a full upload. Cear dirty regions. */
+ vmw_bo_dirty_clear_res(res);
+ }
+
+ res->backup_dirty = false;
+
return 0;
}
@@ -1634,7 +1667,8 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
}
}
} else if (req->base.drm_surface_flags &
- drm_vmw_surface_flag_create_buffer)
+ (drm_vmw_surface_flag_create_buffer |
+ drm_vmw_surface_flag_coherent))
ret = vmw_user_bo_alloc(dev_priv, tfile,
res->backup_size,
req->base.drm_surface_flags &
@@ -1648,6 +1682,26 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
goto out_unlock;
}
+ if (req->base.drm_surface_flags & drm_vmw_surface_flag_coherent) {
+ struct vmw_buffer_object *backup = res->backup;
+
+ ttm_bo_reserve(&backup->base, false, false, NULL);
+ if (!res->func->dirty_alloc)
+ ret = -EINVAL;
+ if (!ret)
+ ret = vmw_bo_dirty_add(backup);
+ if (!ret) {
+ res->coherent = true;
+ ret = res->func->dirty_alloc(res);
+ }
+ ttm_bo_unreserve(&backup->base);
+ if (ret) {
+ vmw_resource_unreference(&res);
+ goto out_unlock;
+ }
+
+ }
+
tmp = vmw_resource_reference(res);
ret = ttm_prime_object_init(tfile, res->backup_size, &user_srf->prime,
req->base.drm_surface_flags &
@@ -1756,3 +1810,338 @@ out_bad_resource:
return ret;
}
+
+/**
+ * vmw_subres_dirty_add - Add a dirty region to a subresource
+ * @dirty: The surfaces's dirty tracker.
+ * @loc_start: The location corresponding to the start of the region.
+ * @loc_end: The location corresponding to the end of the region.
+ *
+ * As we are assuming that @loc_start and @loc_end represent a sequential
+ * range of backing store memory, if the region spans multiple lines then
+ * regardless of the x coordinate, the full lines are dirtied.
+ * Correspondingly if the region spans multiple z slices, then full rather
+ * than partial z slices are dirtied.
+ */
+static void vmw_subres_dirty_add(struct vmw_surface_dirty *dirty,
+ const struct svga3dsurface_loc *loc_start,
+ const struct svga3dsurface_loc *loc_end)
+{
+ const struct svga3dsurface_cache *cache = &dirty->cache;
+ SVGA3dBox *box = &dirty->boxes[loc_start->sub_resource];
+ u32 mip = loc_start->sub_resource % cache->num_mip_levels;
+ const struct drm_vmw_size *size = &cache->mip[mip].size;
+ u32 box_c2 = box->z + box->d;
+
+ if (WARN_ON(loc_start->sub_resource >= dirty->num_subres))
+ return;
+
+ if (box->d == 0 || box->z > loc_start->z)
+ box->z = loc_start->z;
+ if (box_c2 < loc_end->z)
+ box->d = loc_end->z - box->z;
+
+ if (loc_start->z + 1 == loc_end->z) {
+ box_c2 = box->y + box->h;
+ if (box->h == 0 || box->y > loc_start->y)
+ box->y = loc_start->y;
+ if (box_c2 < loc_end->y)
+ box->h = loc_end->y - box->y;
+
+ if (loc_start->y + 1 == loc_end->y) {
+ box_c2 = box->x + box->w;
+ if (box->w == 0 || box->x > loc_start->x)
+ box->x = loc_start->x;
+ if (box_c2 < loc_end->x)
+ box->w = loc_end->x - box->x;
+ } else {
+ box->x = 0;
+ box->w = size->width;
+ }
+ } else {
+ box->y = 0;
+ box->h = size->height;
+ box->x = 0;
+ box->w = size->width;
+ }
+}
+
+/**
+ * vmw_subres_dirty_full - Mark a full subresource as dirty
+ * @dirty: The surface's dirty tracker.
+ * @subres: The subresource
+ */
+static void vmw_subres_dirty_full(struct vmw_surface_dirty *dirty, u32 subres)
+{
+ const struct svga3dsurface_cache *cache = &dirty->cache;
+ u32 mip = subres % cache->num_mip_levels;
+ const struct drm_vmw_size *size = &cache->mip[mip].size;
+ SVGA3dBox *box = &dirty->boxes[subres];
+
+ box->x = 0;
+ box->y = 0;
+ box->z = 0;
+ box->w = size->width;
+ box->h = size->height;
+ box->d = size->depth;
+}
+
+/*
+ * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for texture
+ * surfaces.
+ */
+static void vmw_surface_tex_dirty_range_add(struct vmw_resource *res,
+ size_t start, size_t end)
+{
+ struct vmw_surface_dirty *dirty =
+ (struct vmw_surface_dirty *) res->dirty;
+ size_t backup_end = res->backup_offset + res->backup_size;
+ struct svga3dsurface_loc loc1, loc2;
+ const struct svga3dsurface_cache *cache;
+
+ start = max_t(size_t, start, res->backup_offset) - res->backup_offset;
+ end = min(end, backup_end) - res->backup_offset;
+ cache = &dirty->cache;
+ svga3dsurface_get_loc(cache, &loc1, start);
+ svga3dsurface_get_loc(cache, &loc2, end - 1);
+ svga3dsurface_inc_loc(cache, &loc2);
+
+ if (loc1.sub_resource + 1 == loc2.sub_resource) {
+ /* Dirty range covers a single sub-resource */
+ vmw_subres_dirty_add(dirty, &loc1, &loc2);
+ } else {
+ /* Dirty range covers multiple sub-resources */
+ struct svga3dsurface_loc loc_min, loc_max;
+ u32 sub_res;
+
+ svga3dsurface_max_loc(cache, loc1.sub_resource, &loc_max);
+ vmw_subres_dirty_add(dirty, &loc1, &loc_max);
+ svga3dsurface_min_loc(cache, loc2.sub_resource - 1, &loc_min);
+ vmw_subres_dirty_add(dirty, &loc_min, &loc2);
+ for (sub_res = loc1.sub_resource + 1;
+ sub_res < loc2.sub_resource - 1; ++sub_res)
+ vmw_subres_dirty_full(dirty, sub_res);
+ }
+}
+
+/*
+ * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for buffer
+ * surfaces.
+ */
+static void vmw_surface_buf_dirty_range_add(struct vmw_resource *res,
+ size_t start, size_t end)
+{
+ struct vmw_surface_dirty *dirty =
+ (struct vmw_surface_dirty *) res->dirty;
+ const struct svga3dsurface_cache *cache = &dirty->cache;
+ size_t backup_end = res->backup_offset + cache->mip_chain_bytes;
+ SVGA3dBox *box = &dirty->boxes[0];
+ u32 box_c2;
+
+ box->h = box->d = 1;
+ start = max_t(size_t, start, res->backup_offset) - res->backup_offset;
+ end = min(end, backup_end) - res->backup_offset;
+ box_c2 = box->x + box->w;
+ if (box->w == 0 || box->x > start)
+ box->x = start;
+ if (box_c2 < end)
+ box->w = end - box->x;
+}
+
+/*
+ * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for surfaces
+ */
+static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start,
+ size_t end)
+{
+ struct vmw_surface *srf = vmw_res_to_srf(res);
+
+ if (WARN_ON(end <= res->backup_offset ||
+ start >= res->backup_offset + res->backup_size))
+ return;
+
+ if (srf->format == SVGA3D_BUFFER)
+ vmw_surface_buf_dirty_range_add(res, start, end);
+ else
+ vmw_surface_tex_dirty_range_add(res, start, end);
+}
+
+/*
+ * vmw_surface_dirty_sync - The surface's dirty_sync callback.
+ */
+static int vmw_surface_dirty_sync(struct vmw_resource *res)
+{
+ struct vmw_private *dev_priv = res->dev_priv;
+ bool has_dx = 0;
+ u32 i, num_dirty;
+ struct vmw_surface_dirty *dirty =
+ (struct vmw_surface_dirty *) res->dirty;
+ size_t alloc_size;
+ const struct svga3dsurface_cache *cache = &dirty->cache;
+ struct {
+ SVGA3dCmdHeader header;
+ SVGA3dCmdDXUpdateSubResource body;
+ } *cmd1;
+ struct {
+ SVGA3dCmdHeader header;
+ SVGA3dCmdUpdateGBImage body;
+ } *cmd2;
+ void *cmd;
+
+ num_dirty = 0;
+ for (i = 0; i < dirty->num_subres; ++i) {
+ const SVGA3dBox *box = &dirty->boxes[i];
+
+ if (box->d)
+ num_dirty++;
+ }
+
+ if (!num_dirty)
+ goto out;
+
+ alloc_size = num_dirty * ((has_dx) ? sizeof(*cmd1) : sizeof(*cmd2));
+ cmd = VMW_FIFO_RESERVE(dev_priv, alloc_size);
+ if (!cmd)
+ return -ENOMEM;
+
+ cmd1 = cmd;
+ cmd2 = cmd;
+
+ for (i = 0; i < dirty->num_subres; ++i) {
+ const SVGA3dBox *box = &dirty->boxes[i];
+
+ if (!box->d)
+ continue;
+
+ /*
+ * DX_UPDATE_SUBRESOURCE is aware of array surfaces.
+ * UPDATE_GB_IMAGE is not.
+ */
+ if (has_dx) {
+ cmd1->header.id = SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE;
+ cmd1->header.size = sizeof(cmd1->body);
+ cmd1->body.sid = res->id;
+ cmd1->body.subResource = i;
+ cmd1->body.box = *box;
+ cmd1++;
+ } else {
+ cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
+ cmd2->header.size = sizeof(cmd2->body);
+ cmd2->body.image.sid = res->id;
+ cmd2->body.image.face = i / cache->num_mip_levels;
+ cmd2->body.image.mipmap = i -
+ (cache->num_mip_levels * cmd2->body.image.face);
+ cmd2->body.box = *box;
+ cmd2++;
+ }
+
+ }
+ vmw_fifo_commit(dev_priv, alloc_size);
+ out:
+ memset(&dirty->boxes[0], 0, sizeof(dirty->boxes[0]) *
+ dirty->num_subres);
+
+ return 0;
+}
+
+/*
+ * vmw_surface_dirty_alloc - The surface's dirty_alloc callback.
+ */
+static int vmw_surface_dirty_alloc(struct vmw_resource *res)
+{
+ struct vmw_surface *srf = vmw_res_to_srf(res);
+ struct vmw_surface_dirty *dirty;
+ u32 num_layers = 1;
+ u32 num_mip;
+ u32 num_subres;
+ u32 num_samples;
+ size_t dirty_size, acc_size;
+ static struct ttm_operation_ctx ctx = {
+ .interruptible = false,
+ .no_wait_gpu = false
+ };
+ int ret;
+
+ if (srf->array_size)
+ num_layers = srf->array_size;
+ else if (srf->flags & SVGA3D_SURFACE_CUBEMAP)
+ num_layers *= SVGA3D_MAX_SURFACE_FACES;
+
+ num_mip = srf->mip_levels[0];
+ if (!num_mip)
+ num_mip = 1;
+
+ num_subres = num_layers * num_mip;
+ dirty_size = sizeof(*dirty) + num_subres * sizeof(dirty->boxes[0]);
+ acc_size = ttm_round_pot(dirty_size);
+ ret = ttm_mem_global_alloc(vmw_mem_glob(res->dev_priv),
+ acc_size, &ctx);
+ if (ret) {
+ VMW_DEBUG_USER("Out of graphics memory for surface "
+ "dirty tracker.\n");
+ return ret;
+ }
+
+ dirty = kvzalloc(dirty_size, GFP_KERNEL);
+ if (!dirty) {
+ ret = -ENOMEM;
+ goto out_no_dirty;
+ }
+
+ num_samples = max_t(u32, 1, srf->multisample_count);
+ ret = svga3dsurface_setup_cache(&srf->base_size, srf->format, num_mip,
+ num_layers, num_samples, &dirty->cache);
+ if (ret)
+ goto out_no_cache;
+
+ dirty->num_subres = num_subres;
+ dirty->size = acc_size;
+ res->dirty = (struct vmw_resource_dirty *) dirty;
+
+ return 0;
+
+out_no_cache:
+ kvfree(dirty);
+out_no_dirty:
+ ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size);
+ return ret;
+}
+
+/*
+ * vmw_surface_dirty_free - The surface's dirty_free callback
+ */
+static void vmw_surface_dirty_free(struct vmw_resource *res)
+{
+ struct vmw_surface_dirty *dirty =
+ (struct vmw_surface_dirty *) res->dirty;
+ size_t acc_size = dirty->size;
+
+ kvfree(dirty);
+ ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size);
+ res->dirty = NULL;
+}
+
+/*
+ * vmw_surface_clean - The surface's clean callback
+ */
+static int vmw_surface_clean(struct vmw_resource *res)
+{
+ struct vmw_private *dev_priv = res->dev_priv;
+ size_t alloc_size;
+ struct {
+ SVGA3dCmdHeader header;
+ SVGA3dCmdReadbackGBSurface body;
+ } *cmd;
+
+ alloc_size = sizeof(*cmd);
+ cmd = VMW_FIFO_RESERVE(dev_priv, alloc_size);
+ if (!cmd)
+ return -ENOMEM;
+
+ cmd->header.id = SVGA_3D_CMD_READBACK_GB_SURFACE;
+ cmd->header.size = sizeof(cmd->body);
+ cmd->body.sid = res->id;
+ vmw_fifo_commit(dev_priv, alloc_size);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
index 5a7b8bb420de..ce288756531b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -29,10 +29,23 @@
int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
{
+ static const struct vm_operations_struct vmw_vm_ops = {
+ .pfn_mkwrite = vmw_bo_vm_mkwrite,
+ .page_mkwrite = vmw_bo_vm_mkwrite,
+ .fault = vmw_bo_vm_fault,
+ .open = ttm_bo_vm_open,
+ .close = ttm_bo_vm_close
+ };
struct drm_file *file_priv = filp->private_data;
struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
+ int ret = ttm_bo_mmap(filp, vma, &dev_priv->bdev);
- return ttm_bo_mmap(filp, vma, &dev_priv->bdev);
+ if (ret)
+ return ret;
+
+ vma->vm_ops = &vmw_vm_ops;
+
+ return 0;
}
/* struct vmw_validation_mem callback */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
index 7bff3628fc54..e69bc373ae2e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
@@ -33,6 +33,8 @@
* struct vmw_validation_bo_node - Buffer object validation metadata.
* @base: Metadata used for TTM reservation- and validation.
* @hash: A hash entry used for the duplicate detection hash table.
+ * @coherent_count: If switching backup buffers, number of new coherent
+ * resources that will have this buffer as a backup buffer.
* @as_mob: Validate as mob.
* @cpu_blit: Validate for cpu blit access.
*
@@ -42,6 +44,7 @@
struct vmw_validation_bo_node {
struct ttm_validate_buffer base;
struct drm_hash_item hash;
+ unsigned int coherent_count;
u32 as_mob : 1;
u32 cpu_blit : 1;
};
@@ -459,6 +462,19 @@ int vmw_validation_res_reserve(struct vmw_validation_context *ctx,
if (ret)
goto out_unreserve;
}
+
+ if (val->switching_backup && val->new_backup &&
+ res->coherent) {
+ struct vmw_validation_bo_node *bo_node =
+ vmw_validation_find_bo_dup(ctx,
+ val->new_backup);
+
+ if (WARN_ON(!bo_node)) {
+ ret = -EINVAL;
+ goto out_unreserve;
+ }
+ bo_node->coherent_count++;
+ }
}
return 0;
@@ -565,6 +581,9 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr)
int ret;
list_for_each_entry(entry, &ctx->bo_list, base.head) {
+ struct vmw_buffer_object *vbo =
+ container_of(entry->base.bo, typeof(*vbo), base);
+
if (entry->cpu_blit) {
struct ttm_operation_ctx ctx = {
.interruptible = intr,
@@ -579,6 +598,27 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr)
}
if (ret)
return ret;
+
+ /*
+ * Rather than having the resource code allocating the bo
+ * dirty tracker in resource_unreserve() where we can't fail,
+ * Do it here when validating the buffer object.
+ */
+ if (entry->coherent_count) {
+ unsigned int coherent_count = entry->coherent_count;
+
+ while (coherent_count) {
+ ret = vmw_bo_dirty_add(vbo);
+ if (ret)
+ return ret;
+
+ coherent_count--;
+ }
+ entry->coherent_count -= coherent_count;
+ }
+
+ if (vbo->dirty)
+ vmw_bo_dirty_scan(vbo);
}
return 0;
}
@@ -604,7 +644,8 @@ int vmw_validation_res_validate(struct vmw_validation_context *ctx, bool intr)
struct vmw_resource *res = val->res;
struct vmw_buffer_object *backup = res->backup;
- ret = vmw_resource_validate(res, intr);
+ ret = vmw_resource_validate(res, intr, val->dirty_set &&
+ val->dirty);
if (ret) {
if (ret != -ERESTARTSYS)
DRM_ERROR("Failed to validate resource.\n");
@@ -831,3 +872,34 @@ int vmw_validation_preload_res(struct vmw_validation_context *ctx,
ctx->mem_size_left += size;
return 0;
}
+
+/**
+ * vmw_validation_bo_backoff - Unreserve buffer objects registered with a
+ * validation context
+ * @ctx: The validation context
+ *
+ * This function unreserves the buffer objects previously reserved using
+ * vmw_validation_bo_reserve. It's typically used as part of an error path
+ */
+void vmw_validation_bo_backoff(struct vmw_validation_context *ctx)
+{
+ struct vmw_validation_bo_node *entry;
+
+ /*
+ * Switching coherent resource backup buffers failed.
+ * Release corresponding buffer object dirty trackers.
+ */
+ list_for_each_entry(entry, &ctx->bo_list, base.head) {
+ if (entry->coherent_count) {
+ unsigned int coherent_count = entry->coherent_count;
+ struct vmw_buffer_object *vbo =
+ container_of(entry->base.bo, typeof(*vbo),
+ base);
+
+ while (coherent_count--)
+ vmw_bo_dirty_release(vbo);
+ }
+ }
+
+ ttm_eu_backoff_reservation(&ctx->ticket, &ctx->bo_list);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
index 71ce4b318850..739906d1b3eb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
@@ -174,20 +174,6 @@ vmw_validation_bo_reserve(struct vmw_validation_context *ctx,
}
/**
- * vmw_validation_bo_backoff - Unreserve buffer objects registered with a
- * validation context
- * @ctx: The validation context
- *
- * This function unreserves the buffer objects previously reserved using
- * vmw_validation_bo_reserve. It's typically used as part of an error path
- */
-static inline void
-vmw_validation_bo_backoff(struct vmw_validation_context *ctx)
-{
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->bo_list);
-}
-
-/**
* vmw_validation_bo_fence - Unreserve and fence buffer objects registered
* with a validation context
* @ctx: The validation context
@@ -269,4 +255,6 @@ int vmw_validation_preload_res(struct vmw_validation_context *ctx,
unsigned int size);
void vmw_validation_res_set_dirty(struct vmw_validation_context *ctx,
void *val_private, u32 dirty);
+void vmw_validation_bo_backoff(struct vmw_validation_context *ctx);
+
#endif