From 204129a211fc486164c25e6082941e4fe3ba889d Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Wed, 5 Jan 2022 00:35:50 +0200 Subject: drm/i915: Use to_gt() helper for GGTT accesses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GGTT is currently available both through i915->ggtt and gt->ggtt, and we eventually want to get rid of the i915->ggtt one. Use to_gt() for all i915->ggtt accesses to help with the future refactoring. Signed-off-by: Michał Winiarski Cc: Michal Wajdeczko Signed-off-by: Andi Shyti Reviewed-by: Matt Roper Signed-off-by: Matt Roper Link: https://patchwork.freedesktop.org/patch/msgid/20220104223550.56135-1-andi.shyti@linux.intel.com --- drivers/gpu/drm/i915/i915_gem.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 915bf431f320..e3730096abd9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -88,7 +88,8 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct i915_ggtt *ggtt = &to_i915(dev)->ggtt; + struct drm_i915_private *i915 = to_i915(dev); + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; struct drm_i915_gem_get_aperture *args = data; struct i915_vma *vma; u64 pinned; @@ -289,7 +290,7 @@ static struct i915_vma *i915_gem_gtt_prepare(struct drm_i915_gem_object *obj, bool write) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; struct i915_vma *vma; struct i915_gem_ww_ctx ww; int ret; @@ -350,7 +351,7 @@ static void i915_gem_gtt_cleanup(struct drm_i915_gem_object *obj, struct i915_vma *vma) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; i915_gem_object_unpin_pages(obj); if (drm_mm_node_allocated(node)) { @@ -366,7 +367,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pread *args) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; intel_wakeref_t wakeref; struct drm_mm_node node; void __user *user_data; @@ -522,7 +523,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, const struct drm_i915_gem_pwrite *args) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; struct intel_runtime_pm *rpm = &i915->runtime_pm; intel_wakeref_t wakeref; struct drm_mm_node node; @@ -823,7 +824,7 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) */ list_for_each_entry_safe(obj, on, - &i915->ggtt.userfault_list, userfault_link) + &to_gt(i915)->ggtt->userfault_list, userfault_link) __i915_gem_object_release_mmap_gtt(obj); /* @@ -831,8 +832,8 @@ void i915_gem_runtime_suspend(struct drm_i915_private *i915) * in use by hardware (i.e. they are pinned), we should not be powering * down! All other fences will be reacquired by the user upon waking. */ - for (i = 0; i < i915->ggtt.num_fences; i++) { - struct i915_fence_reg *reg = &i915->ggtt.fence_regs[i]; + for (i = 0; i < to_gt(i915)->ggtt->num_fences; i++) { + struct i915_fence_reg *reg = &to_gt(i915)->ggtt->fence_regs[i]; /* * Ideally we want to assert that the fence register is not @@ -873,7 +874,7 @@ i915_gem_object_ggtt_pin_ww(struct drm_i915_gem_object *obj, u64 size, u64 alignment, u64 flags) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct i915_ggtt *ggtt = &i915->ggtt; + struct i915_ggtt *ggtt = to_gt(i915)->ggtt; struct i915_vma *vma; int ret; @@ -1123,7 +1124,7 @@ err_unlock: /* Minimal basic recovery for KMS */ ret = i915_ggtt_enable_hw(dev_priv); - i915_ggtt_resume(&dev_priv->ggtt); + i915_ggtt_resume(to_gt(dev_priv)->ggtt); intel_init_clock_gating(dev_priv); } @@ -1146,7 +1147,7 @@ void i915_gem_driver_unregister(struct drm_i915_private *i915) void i915_gem_driver_remove(struct drm_i915_private *dev_priv) { - intel_wakeref_auto_fini(&dev_priv->ggtt.userfault_wakeref); + intel_wakeref_auto_fini(&to_gt(dev_priv)->ggtt->userfault_wakeref); i915_gem_suspend_late(dev_priv); intel_gt_driver_remove(to_gt(dev_priv)); -- cgit v1.2.3 From 2f6b90da919208621725d1703c0391f63724fa62 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Mon, 10 Jan 2022 18:22:17 +0100 Subject: drm/i915: Use vma resources for async unbinding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement async (non-blocking) unbinding by not syncing the vma before calling unbind on the vma_resource. Add the resulting unbind fence to the object's dma_resv from where it is picked up by the ttm migration code. Ideally these unbind fences should be coalesced with the migration blit fence to avoid stalling the migration blit waiting for unbind, as they can certainly go on in parallel, but since we don't yet have a reasonable data structure to use to coalesce fences and attach the resulting fence to a timeline, we defer that for now. Note that with async unbinding, even while the unbind waits for the preceding bind to complete before unbinding, the vma itself might have been destroyed in the process, clearing the vma pages. Therefore we can only allow async unbinding if we have a refcounted sg-list and keep a refcount on that for the vma resource pages to stay intact until binding occurs. If this condition is not met, a request for an async unbind is diverted to a sync unbind. v2: - Use a separate kmem_cache for vma resources for now to isolate their memory allocation and aid debugging. - Move the check for vm closed to the actual unbinding thread. Regardless of whether the vm is closed, we need the unbind fence to properly wait for capture. - Clear vma_res::vm on unbind and update its documentation. v4: - Take cache coloring into account when searching for vma resources pending unbind. (Matthew Auld) v5: - Fix timeout and error check in i915_vma_resource_bind_dep_await(). - Avoid taking a reference on the object for async binding if async unbind capable. - Fix braces around a single-line if statement. v6: - Fix up the cache coloring adjustment. (Kernel test robot ) - Don't allow async unbinding if the vma_res pages are not the same as the object pages. (Matthew Auld) v7: - s/unsigned long/u64/ in a number of places (Matthew Auld) Signed-off-by: Thomas Hellström Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220110172219.107131-5-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c | 11 +- drivers/gpu/drm/i915/gt/intel_ggtt.c | 2 +- drivers/gpu/drm/i915/gt/intel_gtt.c | 4 + drivers/gpu/drm/i915/gt/intel_gtt.h | 3 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 12 +- drivers/gpu/drm/i915/i915_module.c | 3 + drivers/gpu/drm/i915/i915_vma.c | 205 +++++++++++++--- drivers/gpu/drm/i915/i915_vma.h | 3 +- drivers/gpu/drm/i915/i915_vma_resource.c | 354 ++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_vma_resource.h | 48 ++++ 11 files changed, 579 insertions(+), 67 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 8653855d808b..1de306c03aaf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -142,7 +142,16 @@ int i915_ttm_move_notify(struct ttm_buffer_object *bo) struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); int ret; - ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + /* + * Note: The async unbinding here will actually transform the + * blocking wait for unbind into a wait before finally submitting + * evict / migration blit and thus stall the migration timeline + * which may not be good for overall throughput. We should make + * sure we await the unbind fences *after* the migration blit + * instead of *before* as we currently do. + */ + ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE | + I915_GEM_OBJECT_UNBIND_ASYNC); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index bb885d862ec4..91ea2882efda 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -131,7 +131,7 @@ void i915_ggtt_suspend(struct i915_ggtt *ggtt) continue; if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { - __i915_vma_evict(vma); + __i915_vma_evict(vma, false); drm_mm_remove_node(&vma->node); } } diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index c3846ff33118..058c88682970 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -160,6 +160,9 @@ static void __i915_vm_release(struct work_struct *work) struct i915_address_space *vm = container_of(work, struct i915_address_space, release_work); + /* Synchronize async unbinds. */ + i915_vma_resource_bind_dep_sync_all(vm); + vm->cleanup(vm); i915_address_space_fini(vm); @@ -188,6 +191,7 @@ void i915_address_space_init(struct i915_address_space *vm, int subclass) if (!kref_read(&vm->resv_ref)) kref_init(&vm->resv_ref); + vm->pending_unbind = RB_ROOT_CACHED; INIT_WORK(&vm->release_work, __i915_vm_release); atomic_set(&vm->open, 1); diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 01519e1d1651..ac1c4479e697 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -265,6 +265,9 @@ struct i915_address_space { /* Flags used when creating page-table objects for this vm */ unsigned long lmem_pt_obj_flags; + /* Interval tree for pending unbind vma resources */ + struct rb_root_cached pending_unbind; + struct drm_i915_gem_object * (*alloc_pt_dma)(struct i915_address_space *vm, int sz); struct drm_i915_gem_object * diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 63a579e952b9..b38810f84181 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1878,6 +1878,7 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj, #define I915_GEM_OBJECT_UNBIND_BARRIER BIT(1) #define I915_GEM_OBJECT_UNBIND_TEST BIT(2) #define I915_GEM_OBJECT_UNBIND_VM_TRYLOCK BIT(3) +#define I915_GEM_OBJECT_UNBIND_ASYNC BIT(4) void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e3730096abd9..3d6c00f845a3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -156,10 +156,16 @@ try_again: spin_unlock(&obj->vma.lock); if (vma) { + bool vm_trylock = !!(flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK); ret = -EBUSY; - if (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || - !i915_vma_is_active(vma)) { - if (flags & I915_GEM_OBJECT_UNBIND_VM_TRYLOCK) { + if (flags & I915_GEM_OBJECT_UNBIND_ASYNC) { + assert_object_held(vma->obj); + ret = i915_vma_unbind_async(vma, vm_trylock); + } + + if (ret == -EBUSY && (flags & I915_GEM_OBJECT_UNBIND_ACTIVE || + !i915_vma_is_active(vma))) { + if (vm_trylock) { if (mutex_trylock(&vma->vm->mutex)) { ret = __i915_vma_unbind(vma); mutex_unlock(&vma->vm->mutex); diff --git a/drivers/gpu/drm/i915/i915_module.c b/drivers/gpu/drm/i915/i915_module.c index ab2295dd4500..8d18714cf75f 100644 --- a/drivers/gpu/drm/i915/i915_module.c +++ b/drivers/gpu/drm/i915/i915_module.c @@ -17,6 +17,7 @@ #include "i915_scheduler.h" #include "i915_selftest.h" #include "i915_vma.h" +#include "i915_vma_resource.h" static int i915_check_nomodeset(void) { @@ -64,6 +65,8 @@ static const struct { .exit = i915_scheduler_module_exit }, { .init = i915_vma_module_init, .exit = i915_vma_module_exit }, + { .init = i915_vma_resource_module_init, + .exit = i915_vma_resource_module_exit }, { .init = i915_mock_selftests }, { .init = i915_pmu_init, .exit = i915_pmu_exit }, diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 9f00dfa5e022..b86666f653ca 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -285,9 +285,10 @@ struct i915_vma_work { struct dma_fence_work base; struct i915_address_space *vm; struct i915_vm_pt_stash stash; - struct i915_vma *vma; + struct i915_vma_resource *vma_res; struct drm_i915_gem_object *pinned; struct i915_sw_dma_fence_cb cb; + struct i915_refct_sgt *rsgt; enum i915_cache_level cache_level; unsigned int flags; }; @@ -295,10 +296,11 @@ struct i915_vma_work { static void __vma_bind(struct dma_fence_work *work) { struct i915_vma_work *vw = container_of(work, typeof(*vw), base); - struct i915_vma *vma = vw->vma; + struct i915_vma_resource *vma_res = vw->vma_res; + + vma_res->ops->bind_vma(vma_res->vm, &vw->stash, + vma_res, vw->cache_level, vw->flags); - vma->ops->bind_vma(vw->vm, &vw->stash, - vma->resource, vw->cache_level, vw->flags); } static void __vma_release(struct dma_fence_work *work) @@ -310,6 +312,10 @@ static void __vma_release(struct dma_fence_work *work) i915_vm_free_pt_stash(vw->vm, &vw->stash); i915_vm_put(vw->vm); + if (vw->vma_res) + i915_vma_resource_put(vw->vma_res); + if (vw->rsgt) + i915_refct_sgt_put(vw->rsgt); } static const struct dma_fence_work_ops bind_ops = { @@ -379,13 +385,11 @@ i915_vma_resource_init_from_vma(struct i915_vma_resource *vma_res, { struct drm_i915_gem_object *obj = vma->obj; - i915_vma_resource_init(vma_res, vma->pages, &vma->page_sizes, + i915_vma_resource_init(vma_res, vma->vm, vma->pages, &vma->page_sizes, i915_gem_object_is_readonly(obj), i915_gem_object_is_lmem(obj), - vma->private, - vma->node.start, - vma->node.size, - vma->size); + vma->ops, vma->private, vma->node.start, + vma->node.size, vma->size); } /** @@ -409,6 +413,7 @@ int i915_vma_bind(struct i915_vma *vma, { u32 bind_flags; u32 vma_flags; + int ret; lockdep_assert_held(&vma->vm->mutex); GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); @@ -417,12 +422,12 @@ int i915_vma_bind(struct i915_vma *vma, if (GEM_DEBUG_WARN_ON(range_overflows(vma->node.start, vma->node.size, vma->vm->total))) { - kfree(vma_res); + i915_vma_resource_free(vma_res); return -ENODEV; } if (GEM_DEBUG_WARN_ON(!flags)) { - kfree(vma_res); + i915_vma_resource_free(vma_res); return -EINVAL; } @@ -434,12 +439,30 @@ int i915_vma_bind(struct i915_vma *vma, bind_flags &= ~vma_flags; if (bind_flags == 0) { - kfree(vma_res); + i915_vma_resource_free(vma_res); return 0; } GEM_BUG_ON(!atomic_read(&vma->pages_count)); + /* Wait for or await async unbinds touching our range */ + if (work && bind_flags & vma->vm->bind_async_flags) + ret = i915_vma_resource_bind_dep_await(vma->vm, + &work->base.chain, + vma->node.start, + vma->node.size, + true, + GFP_NOWAIT | + __GFP_RETRY_MAYFAIL | + __GFP_NOWARN); + else + ret = i915_vma_resource_bind_dep_sync(vma->vm, vma->node.start, + vma->node.size, true); + if (ret) { + i915_vma_resource_free(vma_res); + return ret; + } + if (vma->resource || !vma_res) { /* Rebinding with an additional I915_VMA_*_BIND */ GEM_WARN_ON(!vma_flags); @@ -452,9 +475,11 @@ int i915_vma_bind(struct i915_vma *vma, if (work && bind_flags & vma->vm->bind_async_flags) { struct dma_fence *prev; - work->vma = vma; + work->vma_res = i915_vma_resource_get(vma->resource); work->cache_level = cache_level; work->flags = bind_flags; + if (vma->obj->mm.rsgt) + work->rsgt = i915_refct_sgt_get(vma->obj->mm.rsgt); /* * Note we only want to chain up to the migration fence on @@ -475,14 +500,24 @@ int i915_vma_bind(struct i915_vma *vma, work->base.dma.error = 0; /* enable the queue_work() */ - work->pinned = i915_gem_object_get(vma->obj); + /* + * If we don't have the refcounted pages list, keep a reference + * on the object to avoid waiting for the async bind to + * complete in the object destruction path. + */ + if (!work->rsgt) + work->pinned = i915_gem_object_get(vma->obj); } else { if (vma->obj) { int ret; ret = i915_gem_object_wait_moving_fence(vma->obj, true); - if (ret) + if (ret) { + i915_vma_resource_free(vma->resource); + vma->resource = NULL; + return ret; + } } vma->ops->bind_vma(vma->vm, NULL, vma->resource, cache_level, bind_flags); @@ -1700,8 +1735,9 @@ int _i915_vma_move_to_active(struct i915_vma *vma, return 0; } -void __i915_vma_evict(struct i915_vma *vma) +struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async) { + struct i915_vma_resource *vma_res = vma->resource; struct dma_fence *unbind_fence; GEM_BUG_ON(i915_vma_is_pinned(vma)); @@ -1734,27 +1770,39 @@ void __i915_vma_evict(struct i915_vma *vma) GEM_BUG_ON(vma->fence); GEM_BUG_ON(i915_vma_has_userfault(vma)); - if (likely(atomic_read(&vma->vm->open))) { - trace_i915_vma_unbind(vma); - vma->ops->unbind_vma(vma->vm, vma->resource); - } + /* Object backend must be async capable. */ + GEM_WARN_ON(async && !vma->obj->mm.rsgt); + + /* If vm is not open, unbind is a nop. */ + vma_res->needs_wakeref = i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND) && + atomic_read(&vma->vm->open); + trace_i915_vma_unbind(vma); + + unbind_fence = i915_vma_resource_unbind(vma_res); + vma->resource = NULL; + atomic_and(~(I915_VMA_BIND_MASK | I915_VMA_ERROR | I915_VMA_GGTT_WRITE), &vma->flags); - unbind_fence = i915_vma_resource_unbind(vma->resource); - i915_vma_resource_put(vma->resource); - vma->resource = NULL; + /* Object backend must be async capable. */ + GEM_WARN_ON(async && !vma->obj->mm.rsgt); i915_vma_detach(vma); - vma_unbind_pages(vma); + + if (!async && unbind_fence) { + dma_fence_wait(unbind_fence, false); + dma_fence_put(unbind_fence); + unbind_fence = NULL; + } /* - * This uninterruptible wait under the vm mutex is currently - * only ever blocking while the vma is being captured from. - * With async unbinding, this wait here will be removed. + * Binding itself may not have completed until the unbind fence signals, + * so don't drop the pages until that happens, unless the resource is + * async_capable. */ - dma_fence_wait(unbind_fence, false); - dma_fence_put(unbind_fence); + + vma_unbind_pages(vma); + return unbind_fence; } int __i915_vma_unbind(struct i915_vma *vma) @@ -1781,12 +1829,47 @@ int __i915_vma_unbind(struct i915_vma *vma) return ret; GEM_BUG_ON(i915_vma_is_active(vma)); - __i915_vma_evict(vma); + __i915_vma_evict(vma, false); drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */ return 0; } +static struct dma_fence *__i915_vma_unbind_async(struct i915_vma *vma) +{ + struct dma_fence *fence; + + lockdep_assert_held(&vma->vm->mutex); + + if (!drm_mm_node_allocated(&vma->node)) + return NULL; + + if (i915_vma_is_pinned(vma) || + &vma->obj->mm.rsgt->table != vma->resource->bi.pages) + return ERR_PTR(-EAGAIN); + + /* + * We probably need to replace this with awaiting the fences of the + * object's dma_resv when the vma active goes away. When doing that + * we need to be careful to not add the vma_resource unbind fence + * immediately to the object's dma_resv, because then unbinding + * the next vma from the object, in case there are many, will + * actually await the unbinding of the previous vmas, which is + * undesirable. + */ + if (i915_sw_fence_await_active(&vma->resource->chain, &vma->active, + I915_ACTIVE_AWAIT_EXCL | + I915_ACTIVE_AWAIT_ACTIVE) < 0) { + return ERR_PTR(-EBUSY); + } + + fence = __i915_vma_evict(vma, true); + + drm_mm_remove_node(&vma->node); /* pairs with i915_vma_release() */ + + return fence; +} + int i915_vma_unbind(struct i915_vma *vma) { struct i915_address_space *vm = vma->vm; @@ -1823,6 +1906,68 @@ out_rpm: return err; } +int i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm) +{ + struct drm_i915_gem_object *obj = vma->obj; + struct i915_address_space *vm = vma->vm; + intel_wakeref_t wakeref = 0; + struct dma_fence *fence; + int err; + + /* + * We need the dma-resv lock since we add the + * unbind fence to the dma-resv object. + */ + assert_object_held(obj); + + if (!drm_mm_node_allocated(&vma->node)) + return 0; + + if (i915_vma_is_pinned(vma)) { + vma_print_allocator(vma, "is pinned"); + return -EAGAIN; + } + + if (!obj->mm.rsgt) + return -EBUSY; + + err = dma_resv_reserve_shared(obj->base.resv, 1); + if (err) + return -EBUSY; + + /* + * It would be great if we could grab this wakeref from the + * async unbind work if needed, but we can't because it uses + * kmalloc and it's in the dma-fence signalling critical path. + */ + if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) + wakeref = intel_runtime_pm_get(&vm->i915->runtime_pm); + + if (trylock_vm && !mutex_trylock(&vm->mutex)) { + err = -EBUSY; + goto out_rpm; + } else if (!trylock_vm) { + err = mutex_lock_interruptible_nested(&vm->mutex, !wakeref); + if (err) + goto out_rpm; + } + + fence = __i915_vma_unbind_async(vma); + mutex_unlock(&vm->mutex); + if (IS_ERR_OR_NULL(fence)) { + err = PTR_ERR_OR_ZERO(fence); + goto out_rpm; + } + + dma_resv_add_shared_fence(obj->base.resv, fence); + dma_fence_put(fence); + +out_rpm: + if (wakeref) + intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref); + return err; +} + struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma) { i915_gem_object_make_unshrinkable(vma->obj); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 1df57ec832bd..a560bae04e7e 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -213,9 +213,10 @@ bool i915_vma_misplaced(const struct i915_vma *vma, u64 size, u64 alignment, u64 flags); void __i915_vma_set_map_and_fenceable(struct i915_vma *vma); void i915_vma_revoke_mmap(struct i915_vma *vma); -void __i915_vma_evict(struct i915_vma *vma); +struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async); int __i915_vma_unbind(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); +int __must_check i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); void i915_vma_reopen(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/i915_vma_resource.c b/drivers/gpu/drm/i915/i915_vma_resource.c index b50e67035d15..3e55a30b2da7 100644 --- a/drivers/gpu/drm/i915/i915_vma_resource.c +++ b/drivers/gpu/drm/i915/i915_vma_resource.c @@ -2,39 +2,44 @@ /* * Copyright © 2021 Intel Corporation */ + +#include #include +#include "i915_sw_fence.h" #include "i915_vma_resource.h" +#include "i915_drv.h" -/* Callbacks for the unbind dma-fence. */ -static const char *get_driver_name(struct dma_fence *fence) -{ - return "vma unbind fence"; -} +#include "gt/intel_gtt.h" -static const char *get_timeline_name(struct dma_fence *fence) -{ - return "unbound"; -} - -static struct dma_fence_ops unbind_fence_ops = { - .get_driver_name = get_driver_name, - .get_timeline_name = get_timeline_name, -}; +static struct kmem_cache *slab_vma_resources; /** - * __i915_vma_resource_init - Initialize a vma resource. - * @vma_res: The vma resource to initialize + * DOC: + * We use a per-vm interval tree to keep track of vma_resources + * scheduled for unbind but not yet unbound. The tree is protected by + * the vm mutex, and nodes are removed just after the unbind fence signals. + * The removal takes the vm mutex from a kernel thread which we need to + * keep in mind so that we don't grab the mutex and try to wait for all + * pending unbinds to complete, because that will temporaryily block many + * of the workqueue threads, and people will get angry. * - * Initializes the private members of a vma resource. + * We should consider using a single ordered fence per VM instead but that + * requires ordering the unbinds and might introduce unnecessary waiting + * for unrelated unbinds. Amount of code will probably be roughly the same + * due to the simplicity of using the interval tree interface. + * + * Another drawback of this interval tree is that the complexity of insertion + * and removal of fences increases as O(ln(pending_unbinds)) instead of + * O(1) for a single fence without interval tree. */ -void __i915_vma_resource_init(struct i915_vma_resource *vma_res) -{ - spin_lock_init(&vma_res->lock); - dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops, - &vma_res->lock, 0, 0); - refcount_set(&vma_res->hold_count, 1); -} +#define VMA_RES_START(_node) ((_node)->start) +#define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size - 1) +INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb, + u64, __subtree_last, + VMA_RES_START, VMA_RES_LAST, static, vma_res_itree); + +/* Callbacks for the unbind dma-fence. */ /** * i915_vma_resource_alloc - Allocate a vma resource @@ -45,15 +50,73 @@ void __i915_vma_resource_init(struct i915_vma_resource *vma_res) struct i915_vma_resource *i915_vma_resource_alloc(void) { struct i915_vma_resource *vma_res = - kzalloc(sizeof(*vma_res), GFP_KERNEL); + kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL); return vma_res ? vma_res : ERR_PTR(-ENOMEM); } +/** + * i915_vma_resource_free - Free a vma resource + * @vma_res: The vma resource to free. + */ +void i915_vma_resource_free(struct i915_vma_resource *vma_res) +{ + kmem_cache_free(slab_vma_resources, vma_res); +} + +static const char *get_driver_name(struct dma_fence *fence) +{ + return "vma unbind fence"; +} + +static const char *get_timeline_name(struct dma_fence *fence) +{ + return "unbound"; +} + +static void unbind_fence_free_rcu(struct rcu_head *head) +{ + struct i915_vma_resource *vma_res = + container_of(head, typeof(*vma_res), unbind_fence.rcu); + + i915_vma_resource_free(vma_res); +} + +static void unbind_fence_release(struct dma_fence *fence) +{ + struct i915_vma_resource *vma_res = + container_of(fence, typeof(*vma_res), unbind_fence); + + i915_sw_fence_fini(&vma_res->chain); + + call_rcu(&fence->rcu, unbind_fence_free_rcu); +} + +static struct dma_fence_ops unbind_fence_ops = { + .get_driver_name = get_driver_name, + .get_timeline_name = get_timeline_name, + .release = unbind_fence_release, +}; + static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res) { - if (refcount_dec_and_test(&vma_res->hold_count)) - dma_fence_signal(&vma_res->unbind_fence); + struct i915_address_space *vm; + + if (!refcount_dec_and_test(&vma_res->hold_count)) + return; + + dma_fence_signal(&vma_res->unbind_fence); + + vm = vma_res->vm; + if (vma_res->wakeref) + intel_runtime_pm_put(&vm->i915->runtime_pm, vma_res->wakeref); + + vma_res->vm = NULL; + if (!RB_EMPTY_NODE(&vma_res->rb)) { + mutex_lock(&vm->mutex); + vma_res_itree_remove(vma_res, &vm->pending_unbind); + mutex_unlock(&vm->mutex); + } } /** @@ -102,6 +165,49 @@ bool i915_vma_resource_hold(struct i915_vma_resource *vma_res, return held; } +static void i915_vma_resource_unbind_work(struct work_struct *work) +{ + struct i915_vma_resource *vma_res = + container_of(work, typeof(*vma_res), work); + struct i915_address_space *vm = vma_res->vm; + bool lockdep_cookie; + + lockdep_cookie = dma_fence_begin_signalling(); + if (likely(atomic_read(&vm->open))) + vma_res->ops->unbind_vma(vm, vma_res); + + dma_fence_end_signalling(lockdep_cookie); + __i915_vma_resource_unhold(vma_res); + i915_vma_resource_put(vma_res); +} + +static int +i915_vma_resource_fence_notify(struct i915_sw_fence *fence, + enum i915_sw_fence_notify state) +{ + struct i915_vma_resource *vma_res = + container_of(fence, typeof(*vma_res), chain); + struct dma_fence *unbind_fence = + &vma_res->unbind_fence; + + switch (state) { + case FENCE_COMPLETE: + dma_fence_get(unbind_fence); + if (vma_res->immediate_unbind) { + i915_vma_resource_unbind_work(&vma_res->work); + } else { + INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work); + queue_work(system_unbound_wq, &vma_res->work); + } + break; + case FENCE_FREE: + i915_vma_resource_put(vma_res); + break; + } + + return NOTIFY_DONE; +} + /** * i915_vma_resource_unbind - Unbind a vma resource * @vma_res: The vma resource to unbind. @@ -112,10 +218,196 @@ bool i915_vma_resource_hold(struct i915_vma_resource *vma_res, * Return: A refcounted pointer to a dma-fence that signals when unbinding is * complete. */ -struct dma_fence * -i915_vma_resource_unbind(struct i915_vma_resource *vma_res) +struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res) { - __i915_vma_resource_unhold(vma_res); - dma_fence_get(&vma_res->unbind_fence); + struct i915_address_space *vm = vma_res->vm; + + /* Reference for the sw fence */ + i915_vma_resource_get(vma_res); + + /* Caller must already have a wakeref in this case. */ + if (vma_res->needs_wakeref) + vma_res->wakeref = intel_runtime_pm_get_if_in_use(&vm->i915->runtime_pm); + + if (atomic_read(&vma_res->chain.pending) <= 1) { + RB_CLEAR_NODE(&vma_res->rb); + vma_res->immediate_unbind = 1; + } else { + vma_res_itree_insert(vma_res, &vma_res->vm->pending_unbind); + } + + i915_sw_fence_commit(&vma_res->chain); + return &vma_res->unbind_fence; } + +/** + * __i915_vma_resource_init - Initialize a vma resource. + * @vma_res: The vma resource to initialize + * + * Initializes the private members of a vma resource. + */ +void __i915_vma_resource_init(struct i915_vma_resource *vma_res) +{ + spin_lock_init(&vma_res->lock); + dma_fence_init(&vma_res->unbind_fence, &unbind_fence_ops, + &vma_res->lock, 0, 0); + refcount_set(&vma_res->hold_count, 1); + i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify); +} + +static void +i915_vma_resource_color_adjust_range(struct i915_address_space *vm, + u64 *start, + u64 *end) +{ + if (i915_vm_has_cache_coloring(vm)) { + if (*start) + *start -= I915_GTT_PAGE_SIZE; + *end += I915_GTT_PAGE_SIZE; + } +} + +/** + * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a + * certain vm range. + * @vm: The vm to look at. + * @offset: The range start. + * @size: The range size. + * @intr: Whether to wait interrubtible. + * + * The function needs to be called with the vm lock held. + * + * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true + */ +int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm, + u64 offset, + u64 size, + bool intr) +{ + struct i915_vma_resource *node; + u64 last = offset + size - 1; + + lockdep_assert_held(&vm->mutex); + might_sleep(); + + i915_vma_resource_color_adjust_range(vm, &offset, &last); + node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last); + while (node) { + int ret = dma_fence_wait(&node->unbind_fence, intr); + + if (ret) + return ret; + + node = vma_res_itree_iter_next(node, offset, last); + } + + return 0; +} + +/** + * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm, + * releasing the vm lock while waiting. + * @vm: The vm to look at. + * + * The function may not be called with the vm lock held. + * Typically this is called at vm destruction to finish any pending + * unbind operations. The vm mutex is released while waiting to avoid + * stalling kernel workqueues trying to grab the mutex. + */ +void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm) +{ + struct i915_vma_resource *node; + struct dma_fence *fence; + + do { + fence = NULL; + mutex_lock(&vm->mutex); + node = vma_res_itree_iter_first(&vm->pending_unbind, 0, + U64_MAX); + if (node) + fence = dma_fence_get_rcu(&node->unbind_fence); + mutex_unlock(&vm->mutex); + + if (fence) { + /* + * The wait makes sure the node eventually removes + * itself from the tree. + */ + dma_fence_wait(fence, false); + dma_fence_put(fence); + } + } while (node); +} + +/** + * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all + * pending unbinds in a certain range of a vm. + * @vm: The vm to look at. + * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds. + * @offset: The range start. + * @size: The range size. + * @intr: Whether to wait interrubtible. + * @gfp: Allocation mode for memory allocations. + * + * The function makes @sw_fence await all pending unbinds in a certain + * vm range before calling the complete notifier. To be able to await + * each individual unbind, the function needs to allocate memory using + * the @gpf allocation mode. If that fails, the function will instead + * wait for the unbind fence to signal, using @intr to judge whether to + * wait interruptible or not. Note that @gfp should ideally be selected so + * as to avoid any expensive memory allocation stalls and rather fail and + * synchronize itself. For now the vm mutex is required when calling this + * function with means that @gfp can't call into direct reclaim. In reality + * this means that during heavy memory pressure, we will sync in this + * function. + * + * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true + */ +int i915_vma_resource_bind_dep_await(struct i915_address_space *vm, + struct i915_sw_fence *sw_fence, + u64 offset, + u64 size, + bool intr, + gfp_t gfp) +{ + struct i915_vma_resource *node; + u64 last = offset + size - 1; + + lockdep_assert_held(&vm->mutex); + might_alloc(gfp); + might_sleep(); + + i915_vma_resource_color_adjust_range(vm, &offset, &last); + node = vma_res_itree_iter_first(&vm->pending_unbind, offset, last); + while (node) { + int ret; + + ret = i915_sw_fence_await_dma_fence(sw_fence, + &node->unbind_fence, + 0, gfp); + if (ret < 0) { + ret = dma_fence_wait(&node->unbind_fence, intr); + if (ret) + return ret; + } + + node = vma_res_itree_iter_next(node, offset, last); + } + + return 0; +} + +void i915_vma_resource_module_exit(void) +{ + kmem_cache_destroy(slab_vma_resources); +} + +int __init i915_vma_resource_module_init(void) +{ + slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN); + if (!slab_vma_resources) + return -ENOMEM; + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_vma_resource.h b/drivers/gpu/drm/i915/i915_vma_resource.h index 8071b40f4f9c..cb3f4e1e8457 100644 --- a/drivers/gpu/drm/i915/i915_vma_resource.h +++ b/drivers/gpu/drm/i915/i915_vma_resource.h @@ -10,6 +10,8 @@ #include #include "i915_gem.h" +#include "i915_sw_fence.h" +#include "intel_runtime_pm.h" struct i915_page_sizes { /** @@ -38,6 +40,13 @@ struct i915_page_sizes { * @hold_count: Number of holders blocking the fence from finishing. * The vma itself is keeping a hold, which is released when unbind * is scheduled. + * @work: Work struct for deferred unbind work. + * @chain: Pointer to struct i915_sw_fence used to await dependencies. + * @rb: Rb node for the vm's pending unbind interval tree. + * @__subtree_last: Interval tree private member. + * @vm: non-refcounted pointer to the vm. This is for internal use only and + * this member is cleared after vm_resource unbind. + * @ops: Pointer to the backend i915_vma_ops. * @private: Bind backend private info. * @start: Offset into the address space of bind range start. * @node_size: Size of the allocated range manager node. @@ -45,6 +54,8 @@ struct i915_page_sizes { * @page_sizes_gtt: Resulting page sizes from the bind operation. * @bound_flags: Flags indicating binding status. * @allocated: Backend private data. TODO: Should move into @private. + * @immediate_unbind: Unbind can be done immediately and don't need to be + * deferred to a work item awaiting unsignaled fences. * * The lifetime of a struct i915_vma_resource is from a binding request to * the actual possible asynchronous unbind has completed. @@ -54,6 +65,12 @@ struct i915_vma_resource { /* See above for description of the lock. */ spinlock_t lock; refcount_t hold_count; + struct work_struct work; + struct i915_sw_fence chain; + struct rb_node rb; + u64 __subtree_last; + struct i915_address_space *vm; + intel_wakeref_t wakeref; /** * struct i915_vma_bindinfo - Information needed for async bind @@ -73,13 +90,17 @@ struct i915_vma_resource { bool lmem:1; } bi; + const struct i915_vma_ops *ops; void *private; u64 start; u64 node_size; u64 vma_size; u32 page_sizes_gtt; + u32 bound_flags; bool allocated:1; + bool immediate_unbind:1; + bool needs_wakeref:1; }; bool i915_vma_resource_hold(struct i915_vma_resource *vma_res, @@ -90,6 +111,8 @@ void i915_vma_resource_unhold(struct i915_vma_resource *vma_res, struct i915_vma_resource *i915_vma_resource_alloc(void); +void i915_vma_resource_free(struct i915_vma_resource *vma_res); + struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res); void __i915_vma_resource_init(struct i915_vma_resource *vma_res); @@ -119,10 +142,12 @@ static inline void i915_vma_resource_put(struct i915_vma_resource *vma_res) /** * i915_vma_resource_init - Initialize a vma resource. * @vma_res: The vma resource to initialize + * @vm: Pointer to the vm. * @pages: The pages sg-table. * @page_sizes: Page sizes of the pages. * @readonly: Whether the vma should be bound read-only. * @lmem: Whether the vma points to lmem. + * @ops: The backend ops. * @private: Bind backend private info. * @start: Offset into the address space of bind range start. * @node_size: Size of the allocated range manager node. @@ -134,20 +159,24 @@ static inline void i915_vma_resource_put(struct i915_vma_resource *vma_res) * allocation is not allowed. */ static inline void i915_vma_resource_init(struct i915_vma_resource *vma_res, + struct i915_address_space *vm, struct sg_table *pages, const struct i915_page_sizes *page_sizes, bool readonly, bool lmem, + const struct i915_vma_ops *ops, void *private, u64 start, u64 node_size, u64 size) { __i915_vma_resource_init(vma_res); + vma_res->vm = vm; vma_res->bi.pages = pages; vma_res->bi.page_sizes = *page_sizes; vma_res->bi.readonly = readonly; vma_res->bi.lmem = lmem; + vma_res->ops = ops; vma_res->private = private; vma_res->start = start; vma_res->node_size = node_size; @@ -157,6 +186,25 @@ static inline void i915_vma_resource_init(struct i915_vma_resource *vma_res, static inline void i915_vma_resource_fini(struct i915_vma_resource *vma_res) { GEM_BUG_ON(refcount_read(&vma_res->hold_count) != 1); + i915_sw_fence_fini(&vma_res->chain); } +int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm, + u64 first, + u64 last, + bool intr); + +int i915_vma_resource_bind_dep_await(struct i915_address_space *vm, + struct i915_sw_fence *sw_fence, + u64 first, + u64 last, + bool intr, + gfp_t gfp); + +void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm); + +void i915_vma_resource_module_exit(void); + +int i915_vma_resource_module_init(void); + #endif -- cgit v1.2.3 From 0f341974cbc2a4efe074dd24c153e439b8430afe Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 14 Jan 2022 14:23:18 +0100 Subject: drm/i915: Add i915_vma_unbind_unlocked, and take obj lock for i915_vma_unbind, v2. We want to remove more members of i915_vma, which requires the locking to be held more often. Start requiring gem object lock for i915_vma_unbind, as it's one of the callers that may unpin pages. Some special care is needed when evicting, because the last reference to the object may be held by the VMA, so after __i915_vma_unbind, vma may be garbage, and we need to cache vma->obj before unlocking. Changes since v1: - Make trylock failing a WARN. (Matt) - Remove double i915_vma_wait_for_bind() (Matt) - Move atomic_set to right before mutex_unlock(), to make it more clear they belong together. (Matt) Signed-off-by: Maarten Lankhorst Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20220114132320.109030-5-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/display/intel_fb_pin.c | 2 +- drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 2 +- .../drm/i915/gem/selftests/i915_gem_client_blt.c | 2 +- drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c | 6 +++ drivers/gpu/drm/i915/gt/intel_ggtt.c | 45 ++++++++++++++++++++-- drivers/gpu/drm/i915/i915_gem.c | 2 + drivers/gpu/drm/i915/i915_vma.c | 27 ++++++++++++- drivers/gpu/drm/i915/i915_vma.h | 1 + drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 22 +++++------ drivers/gpu/drm/i915/selftests/i915_vma.c | 8 ++-- 10 files changed, 94 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/display/intel_fb_pin.c b/drivers/gpu/drm/i915/display/intel_fb_pin.c index 31c15e5fca95..9c555f6d1958 100644 --- a/drivers/gpu/drm/i915/display/intel_fb_pin.c +++ b/drivers/gpu/drm/i915/display/intel_fb_pin.c @@ -47,7 +47,7 @@ intel_pin_fb_obj_dpt(struct drm_framebuffer *fb, goto err; if (i915_vma_misplaced(vma, 0, alignment, 0)) { - ret = i915_vma_unbind(vma); + ret = i915_vma_unbind_unlocked(vma); if (ret) { vma = ERR_PTR(ret); goto err; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 26f997c376a2..f36191ebf964 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -641,7 +641,7 @@ static int igt_mock_ppgtt_misaligned_dma(void *arg) * pages. */ for (offset = 4096; offset < page_size; offset += 4096) { - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); if (err) goto out_unpin; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index c08f766e6e15..c8ff8bf0986d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -318,7 +318,7 @@ static int pin_buffer(struct i915_vma *vma, u64 addr) int err; if (drm_mm_node_allocated(&vma->node) && vma->node.start != addr) { - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); if (err) return err; } diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index f61356b72b1c..ba29767348be 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -166,7 +166,9 @@ static int check_partial_mapping(struct drm_i915_gem_object *obj, kunmap(p); out: + i915_gem_object_lock(obj, NULL); __i915_vma_put(vma); + i915_gem_object_unlock(obj); return err; } @@ -261,7 +263,9 @@ static int check_partial_mappings(struct drm_i915_gem_object *obj, if (err) return err; + i915_gem_object_lock(obj, NULL); __i915_vma_put(vma); + i915_gem_object_unlock(obj); if (igt_timeout(end_time, "%s: timed out after tiling=%d stride=%d\n", @@ -1352,7 +1356,9 @@ static int __igt_mmap_revoke(struct drm_i915_private *i915, * for other objects. Ergo we have to revoke the previous mmap PTE * access as it no longer points to the same object. */ + i915_gem_object_lock(obj, NULL); err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE); + i915_gem_object_unlock(obj); if (err) { pr_err("Failed to unbind object!\n"); goto out_unmap; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index da7f54b6fa38..536b0995b595 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -129,22 +129,51 @@ void i915_ggtt_suspend_vm(struct i915_address_space *vm) drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); +retry: + i915_gem_drain_freed_objects(vm->i915); + mutex_lock(&vm->mutex); /* Skip rewriting PTE on VMA unbind. */ open = atomic_xchg(&vm->open, 0); list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { + struct drm_i915_gem_object *obj = vma->obj; + GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - i915_vma_wait_for_bind(vma); - if (i915_vma_is_pinned(vma)) + if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) continue; + /* unlikely to race when GPU is idle, so no worry about slowpath.. */ + if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) { + /* + * No dead objects should appear here, GPU should be + * completely idle, and userspace suspended + */ + i915_gem_object_get(obj); + + atomic_set(&vm->open, open); + mutex_unlock(&vm->mutex); + + i915_gem_object_lock(obj, NULL); + open = i915_vma_unbind(vma); + i915_gem_object_unlock(obj); + + GEM_WARN_ON(open); + + i915_gem_object_put(obj); + goto retry; + } + if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { + i915_vma_wait_for_bind(vma); + __i915_vma_evict(vma, false); drm_mm_remove_node(&vma->node); } + + i915_gem_object_unlock(obj); } vm->clear_range(vm, 0, vm->total); @@ -746,11 +775,21 @@ static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) atomic_set(&ggtt->vm.open, 0); flush_workqueue(ggtt->vm.i915->wq); + i915_gem_drain_freed_objects(ggtt->vm.i915); mutex_lock(&ggtt->vm.mutex); - list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { + struct drm_i915_gem_object *obj = vma->obj; + bool trylock; + + trylock = i915_gem_object_trylock(obj, NULL); + WARN_ON(!trylock); + WARN_ON(__i915_vma_unbind(vma)); + if (trylock) + i915_gem_object_unlock(obj); + } if (drm_mm_node_allocated(&ggtt->error_capture)) drm_mm_remove_node(&ggtt->error_capture); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3d6c00f845a3..bb65563296b5 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -119,6 +119,8 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; + assert_object_held(obj); + if (list_empty(&obj->vma.list)) return 0; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 9aa651d919f2..2a24986861e3 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1622,8 +1622,16 @@ void i915_vma_parked(struct intel_gt *gt) struct drm_i915_gem_object *obj = vma->obj; struct i915_address_space *vm = vma->vm; - INIT_LIST_HEAD(&vma->closed_link); - __i915_vma_put(vma); + if (i915_gem_object_trylock(obj, NULL)) { + INIT_LIST_HEAD(&vma->closed_link); + __i915_vma_put(vma); + i915_gem_object_unlock(obj); + } else { + /* back you go.. */ + spin_lock_irq(>->closed_lock); + list_add(&vma->closed_link, >->closed_vma); + spin_unlock_irq(>->closed_lock); + } i915_gem_object_put(obj); i915_vm_close(vm); @@ -1742,6 +1750,7 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async) struct dma_fence *unbind_fence; GEM_BUG_ON(i915_vma_is_pinned(vma)); + assert_object_held_shared(vma->obj); if (i915_vma_is_map_and_fenceable(vma)) { /* Force a pagefault for domain tracking on next user access */ @@ -1808,6 +1817,7 @@ int __i915_vma_unbind(struct i915_vma *vma) int ret; lockdep_assert_held(&vma->vm->mutex); + assert_object_held_shared(vma->obj); if (!drm_mm_node_allocated(&vma->node)) return 0; @@ -1874,6 +1884,8 @@ int i915_vma_unbind(struct i915_vma *vma) intel_wakeref_t wakeref = 0; int err; + assert_object_held_shared(vma->obj); + /* Optimistic wait before taking the mutex */ err = i915_vma_sync(vma); if (err) @@ -1966,6 +1978,17 @@ out_rpm: return err; } +int i915_vma_unbind_unlocked(struct i915_vma *vma) +{ + int err; + + i915_gem_object_lock(vma->obj, NULL); + err = i915_vma_unbind(vma); + i915_gem_object_unlock(vma->obj); + + return err; +} + struct i915_vma *i915_vma_make_unshrinkable(struct i915_vma *vma) { i915_gem_object_make_unshrinkable(vma->obj); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index a560bae04e7e..011af044ad4f 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -217,6 +217,7 @@ struct dma_fence *__i915_vma_evict(struct i915_vma *vma, bool async); int __i915_vma_unbind(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); int __must_check i915_vma_unbind_async(struct i915_vma *vma, bool trylock_vm); +int __must_check i915_vma_unbind_unlocked(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); void i915_vma_reopen(struct i915_vma *vma); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index b91ec3d2d66a..fba1c8be1649 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -386,7 +386,7 @@ static void close_object_list(struct list_head *objects, vma = i915_vma_instance(obj, vm, NULL); if (!IS_ERR(vma)) - ignored = i915_vma_unbind(vma); + ignored = i915_vma_unbind_unlocked(vma); list_del(&obj->st_link); i915_gem_object_put(obj); @@ -497,7 +497,7 @@ static int fill_hole(struct i915_address_space *vm, goto err; } - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); if (err) { pr_err("%s(%s) (forward) unbind of vma.node=%llx + %llx failed with err=%d\n", __func__, p->name, vma->node.start, vma->node.size, @@ -570,7 +570,7 @@ static int fill_hole(struct i915_address_space *vm, goto err; } - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); if (err) { pr_err("%s(%s) (backward) unbind of vma.node=%llx + %llx failed with err=%d\n", __func__, p->name, vma->node.start, vma->node.size, @@ -656,7 +656,7 @@ static int walk_hole(struct i915_address_space *vm, goto err_put; } - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); if (err) { pr_err("%s unbind failed at %llx + %llx with err=%d\n", __func__, addr, vma->size, err); @@ -733,13 +733,13 @@ static int pot_hole(struct i915_address_space *vm, pr_err("%s incorrect at %llx + %llx\n", __func__, addr, vma->size); i915_vma_unpin(vma); - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); err = -EINVAL; goto err_obj; } i915_vma_unpin(vma); - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); GEM_BUG_ON(err); } @@ -833,13 +833,13 @@ static int drunk_hole(struct i915_address_space *vm, pr_err("%s incorrect at %llx + %llx\n", __func__, addr, BIT_ULL(size)); i915_vma_unpin(vma); - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); err = -EINVAL; goto err_obj; } i915_vma_unpin(vma); - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); GEM_BUG_ON(err); if (igt_timeout(end_time, @@ -907,7 +907,7 @@ static int __shrink_hole(struct i915_address_space *vm, pr_err("%s incorrect at %llx + %llx\n", __func__, addr, size); i915_vma_unpin(vma); - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); err = -EINVAL; break; } @@ -1481,7 +1481,7 @@ static int igt_gtt_reserve(void *arg) goto out; } - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); if (err) { pr_err("i915_vma_unbind failed with err=%d!\n", err); goto out; @@ -1677,7 +1677,7 @@ static int igt_gtt_insert(void *arg) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); offset = vma->node.start; - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); if (err) { pr_err("i915_vma_unbind failed with err=%d!\n", err); goto out; diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index de37cfa4c65f..0280605a2673 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -340,7 +340,7 @@ static int igt_vma_pin1(void *arg) if (!err) { i915_vma_unpin(vma); - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); if (err) { pr_err("Failed to unbind single page from GGTT, err=%d\n", err); goto out; @@ -691,7 +691,7 @@ static int igt_vma_rotate_remap(void *arg) } i915_vma_unpin(vma); - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); if (err) { pr_err("Unbinding returned %i\n", err); goto out_object; @@ -852,7 +852,7 @@ static int igt_vma_partial(void *arg) i915_vma_unpin(vma); nvma++; - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); if (err) { pr_err("Unbinding returned %i\n", err); goto out_object; @@ -891,7 +891,7 @@ static int igt_vma_partial(void *arg) i915_vma_unpin(vma); - err = i915_vma_unbind(vma); + err = i915_vma_unbind_unlocked(vma); if (err) { pr_err("Unbinding returned %i\n", err); goto out_object; -- cgit v1.2.3 From f3392b85130fdc9e17bf6abe362d5e9e4bc9b8b1 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 10 Feb 2022 17:45:40 +0200 Subject: drm/i915: remove leftover i915_gem_pm.h declarations from i915_drv.h Remove the duplicates. Cc: Tvrtko Ursulin Cc: Daniel Vetter Signed-off-by: Jani Nikula Acked-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/48f5ace6393533372da5d13df3de0203c8db11b3.1644507885.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 --- drivers/gpu/drm/i915/i915_gem.c | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6f6e8f1e1978..a71fc9855e4e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1619,9 +1619,6 @@ void i915_gem_driver_register(struct drm_i915_private *i915); void i915_gem_driver_unregister(struct drm_i915_private *i915); void i915_gem_driver_remove(struct drm_i915_private *dev_priv); void i915_gem_driver_release(struct drm_i915_private *dev_priv); -void i915_gem_suspend(struct drm_i915_private *dev_priv); -void i915_gem_suspend_late(struct drm_i915_private *dev_priv); -void i915_gem_resume(struct drm_i915_private *dev_priv); int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5ef959a9f594..7b66d90318ce 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -44,6 +44,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_ioctls.h" #include "gem/i915_gem_mman.h" +#include "gem/i915_gem_pm.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_userptr.h" #include "gt/intel_engine_user.h" -- cgit v1.2.3 From 5472b3f2d9ae65d809d0443dd8f5cc7e1b20b1af Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 10 Feb 2022 17:45:47 +0200 Subject: drm/i915: split out i915_file_private.h from i915_drv.h Limit the scope of struct drm_i915_file_private to the files that actually need it. Cc: Tvrtko Ursulin Cc: Daniel Vetter Signed-off-by: Jani Nikula Acked-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/e375859dc1729a1b988036e4103e5b1bd48caa00.1644507885.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_context.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_object.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_throttle.c | 1 + drivers/gpu/drm/i915/gem/selftests/mock_context.c | 1 + drivers/gpu/drm/i915/gt/intel_reset.c | 1 + drivers/gpu/drm/i915/i915_driver.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 93 ------------------- drivers/gpu/drm/i915/i915_file_private.h | 108 ++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_perf.c | 1 + 11 files changed, 117 insertions(+), 94 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_file_private.h (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index d3fe0375c778..321f793d5af9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -79,6 +79,7 @@ #include "pxp/intel_pxp.h" +#include "i915_file_private.h" #include "i915_gem_context.h" #include "i915_trace.h" #include "i915_user_extensions.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 355a7b68fdac..8e19b3baa684 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -25,6 +25,7 @@ #include "i915_cmd_parser.h" #include "i915_drv.h" +#include "i915_file_private.h" #include "i915_gem_clflush.h" #include "i915_gem_context.h" #include "i915_gem_evict.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 6b719368467e..160431b41fa7 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -28,6 +28,7 @@ #include "pxp/intel_pxp.h" #include "i915_drv.h" +#include "i915_file_private.h" #include "i915_gem_clflush.h" #include "i915_gem_context.h" #include "i915_gem_dmabuf.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c index 75501db71041..af85d0c28168 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c @@ -9,6 +9,7 @@ #include #include "i915_drv.h" +#include "i915_file_private.h" #include "i915_gem_context.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c index c0a8ef368044..6d6082b5f31f 100644 --- a/drivers/gpu/drm/i915/gem/selftests/mock_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c @@ -4,6 +4,7 @@ * Copyright © 2016 Intel Corporation */ +#include "i915_file_private.h" #include "mock_context.h" #include "selftests/mock_drm.h" #include "selftests/mock_gtt.h" diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 59beb69ff6f2..13e3a7b236b3 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -14,6 +14,7 @@ #include "gt/intel_gt_regs.h" #include "i915_drv.h" +#include "i915_file_private.h" #include "i915_gpu_error.h" #include "i915_irq.h" #include "intel_breadcrumbs.h" diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 364c88763543..a892a1b546a8 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -73,6 +73,7 @@ #include "pxp/intel_pxp_pm.h" +#include "i915_file_private.h" #include "i915_debugfs.h" #include "i915_driver.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3e78855b329a..195ece479271 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -48,7 +48,6 @@ #include #include #include -#include #include #include @@ -180,98 +179,6 @@ struct i915_hotplug { I915_GEM_DOMAIN_INSTRUCTION | \ I915_GEM_DOMAIN_VERTEX) -struct drm_i915_file_private { - struct drm_i915_private *dev_priv; - - union { - struct drm_file *file; - struct rcu_head rcu; - }; - - /** @proto_context_lock: Guards all struct i915_gem_proto_context - * operations - * - * This not only guards @proto_context_xa, but is always held - * whenever we manipulate any struct i915_gem_proto_context, - * including finalizing it on first actual use of the GEM context. - * - * See i915_gem_proto_context. - */ - struct mutex proto_context_lock; - - /** @proto_context_xa: xarray of struct i915_gem_proto_context - * - * Historically, the context uAPI allowed for two methods of - * setting context parameters: SET_CONTEXT_PARAM and - * CONTEXT_CREATE_EXT_SETPARAM. The former is allowed to be called - * at any time while the later happens as part of - * GEM_CONTEXT_CREATE. Everything settable via one was settable - * via the other. While some params are fairly simple and setting - * them on a live context is harmless such as the context priority, - * others are far trickier such as the VM or the set of engines. - * In order to swap out the VM, for instance, we have to delay - * until all current in-flight work is complete, swap in the new - * VM, and then continue. This leads to a plethora of potential - * race conditions we'd really rather avoid. - * - * We have since disallowed setting these more complex parameters - * on active contexts. This works by delaying the creation of the - * actual context until after the client is done configuring it - * with SET_CONTEXT_PARAM. From the perspective of the client, it - * has the same u32 context ID the whole time. From the - * perspective of i915, however, it's a struct i915_gem_proto_context - * right up until the point where we attempt to do something which - * the proto-context can't handle. Then the struct i915_gem_context - * gets created. - * - * This is accomplished via a little xarray dance. When - * GEM_CONTEXT_CREATE is called, we create a struct - * i915_gem_proto_context, reserve a slot in @context_xa but leave - * it NULL, and place the proto-context in the corresponding slot - * in @proto_context_xa. Then, in i915_gem_context_lookup(), we - * first check @context_xa. If it's there, we return the struct - * i915_gem_context and we're done. If it's not, we look in - * @proto_context_xa and, if we find it there, we create the actual - * context and kill the proto-context. - * - * In order for this dance to work properly, everything which ever - * touches a struct i915_gem_proto_context is guarded by - * @proto_context_lock, including context creation. Yes, this - * means context creation now takes a giant global lock but it - * can't really be helped and that should never be on any driver's - * fast-path anyway. - */ - struct xarray proto_context_xa; - - /** @context_xa: xarray of fully created i915_gem_context - * - * Write access to this xarray is guarded by @proto_context_lock. - * Otherwise, writers may race with finalize_create_context_locked(). - * - * See @proto_context_xa. - */ - struct xarray context_xa; - struct xarray vm_xa; - - unsigned int bsd_engine; - -/* - * Every context ban increments per client ban score. Also - * hangs in short succession increments ban score. If ban threshold - * is reached, client is considered banned and submitting more work - * will fail. This is a stop gap measure to limit the badly behaving - * clients access to gpu. Note that unbannable contexts never increment - * the client ban score. - */ -#define I915_CLIENT_SCORE_HANG_FAST 1 -#define I915_CLIENT_FAST_HANG_JIFFIES (60 * HZ) -#define I915_CLIENT_SCORE_CONTEXT_BAN 3 -#define I915_CLIENT_SCORE_BANNED 9 - /** ban_score: Accumulated score of all ctx bans and fast hangs. */ - atomic_t ban_score; - unsigned long hang_timestamp; -}; - struct sdvo_device_mapping { u8 initialized; u8 dvo_port; diff --git a/drivers/gpu/drm/i915/i915_file_private.h b/drivers/gpu/drm/i915/i915_file_private.h new file mode 100644 index 000000000000..fb16cc431b2a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_file_private.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __I915_FILE_PRIVATE_H__ +#define __I915_FILE_PRIVATE_H__ + +#include +#include +#include + +struct drm_i915_private; +struct drm_file; + +struct drm_i915_file_private { + struct drm_i915_private *dev_priv; + + union { + struct drm_file *file; + struct rcu_head rcu; + }; + + /** @proto_context_lock: Guards all struct i915_gem_proto_context + * operations + * + * This not only guards @proto_context_xa, but is always held + * whenever we manipulate any struct i915_gem_proto_context, + * including finalizing it on first actual use of the GEM context. + * + * See i915_gem_proto_context. + */ + struct mutex proto_context_lock; + + /** @proto_context_xa: xarray of struct i915_gem_proto_context + * + * Historically, the context uAPI allowed for two methods of + * setting context parameters: SET_CONTEXT_PARAM and + * CONTEXT_CREATE_EXT_SETPARAM. The former is allowed to be called + * at any time while the later happens as part of + * GEM_CONTEXT_CREATE. Everything settable via one was settable + * via the other. While some params are fairly simple and setting + * them on a live context is harmless such as the context priority, + * others are far trickier such as the VM or the set of engines. + * In order to swap out the VM, for instance, we have to delay + * until all current in-flight work is complete, swap in the new + * VM, and then continue. This leads to a plethora of potential + * race conditions we'd really rather avoid. + * + * We have since disallowed setting these more complex parameters + * on active contexts. This works by delaying the creation of the + * actual context until after the client is done configuring it + * with SET_CONTEXT_PARAM. From the perspective of the client, it + * has the same u32 context ID the whole time. From the + * perspective of i915, however, it's a struct i915_gem_proto_context + * right up until the point where we attempt to do something which + * the proto-context can't handle. Then the struct i915_gem_context + * gets created. + * + * This is accomplished via a little xarray dance. When + * GEM_CONTEXT_CREATE is called, we create a struct + * i915_gem_proto_context, reserve a slot in @context_xa but leave + * it NULL, and place the proto-context in the corresponding slot + * in @proto_context_xa. Then, in i915_gem_context_lookup(), we + * first check @context_xa. If it's there, we return the struct + * i915_gem_context and we're done. If it's not, we look in + * @proto_context_xa and, if we find it there, we create the actual + * context and kill the proto-context. + * + * In order for this dance to work properly, everything which ever + * touches a struct i915_gem_proto_context is guarded by + * @proto_context_lock, including context creation. Yes, this + * means context creation now takes a giant global lock but it + * can't really be helped and that should never be on any driver's + * fast-path anyway. + */ + struct xarray proto_context_xa; + + /** @context_xa: xarray of fully created i915_gem_context + * + * Write access to this xarray is guarded by @proto_context_lock. + * Otherwise, writers may race with finalize_create_context_locked(). + * + * See @proto_context_xa. + */ + struct xarray context_xa; + struct xarray vm_xa; + + unsigned int bsd_engine; + +/* + * Every context ban increments per client ban score. Also + * hangs in short succession increments ban score. If ban threshold + * is reached, client is considered banned and submitting more work + * will fail. This is a stop gap measure to limit the badly behaving + * clients access to gpu. Note that unbannable contexts never increment + * the client ban score. + */ +#define I915_CLIENT_SCORE_HANG_FAST 1 +#define I915_CLIENT_FAST_HANG_JIFFIES (60 * HZ) +#define I915_CLIENT_SCORE_CONTEXT_BAN 3 +#define I915_CLIENT_SCORE_BANNED 9 + /** ban_score: Accumulated score of all ctx bans and fast hangs. */ + atomic_t ban_score; + unsigned long hang_timestamp; +}; + +#endif /* __I915_FILE_PRIVATE_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7b66d90318ce..6eb568925080 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -53,9 +53,9 @@ #include "gt/intel_workarounds.h" #include "i915_drv.h" +#include "i915_file_private.h" #include "i915_trace.h" #include "i915_vgpu.h" - #include "intel_pm.h" static int diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 09f7cf7bbc45..0f76686f6404 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -209,6 +209,7 @@ #include "gt/intel_ring.h" #include "i915_drv.h" +#include "i915_file_private.h" #include "i915_perf.h" #include "i915_perf_oa_regs.h" -- cgit v1.2.3 From 5f2ec9095ce8439614ac289819a44e02f52a5415 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Thu, 10 Feb 2022 17:45:48 +0200 Subject: drm/i915: don't include drm_cache.h in i915_drv.h Include it only in files that use it. Cc: Tvrtko Ursulin Cc: Daniel Vetter Signed-off-by: Jani Nikula Acked-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/14edab4a193ea3f73f387a88e3836c8555401871.1644507885.git.jani.nikula@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_clflush.c | 2 ++ drivers/gpu/drm/i915/gem/i915_gem_context.c | 1 + drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 ++ drivers/gpu/drm/i915/gem/i915_gem_object.c | 2 ++ drivers/gpu/drm/i915/gem/i915_gem_pages.c | 2 ++ drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 2 ++ drivers/gpu/drm/i915/gt/intel_ring_submission.c | 2 ++ drivers/gpu/drm/i915/gt/intel_timeline.c | 2 ++ drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 2 ++ drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 2 ++ drivers/gpu/drm/i915/i915_cmd_parser.c | 2 ++ drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem.c | 4 +++- drivers/gpu/drm/i915/i915_gpu_error.c | 1 + 14 files changed, 25 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_gem.c') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index 8a248003dfae..ce91b23385cf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -4,6 +4,8 @@ * Copyright © 2016 Intel Corporation */ +#include + #include "display/intel_frontbuffer.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 321f793d5af9..2958e2be4292 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -67,6 +67,7 @@ #include #include +#include #include #include "gt/gen6_ppgtt.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 1478c02a82cb..a3b60e1ede90 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -9,6 +9,8 @@ #include #include +#include + #include "gt/intel_gt.h" #include "gt/intel_gt_requests.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 160431b41fa7..9a478c19c477 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -24,6 +24,8 @@ #include +#include + #include "display/intel_frontbuffer.h" #include "pxp/intel_pxp.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index a50f884973bc..022582a1ca17 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -4,6 +4,8 @@ * Copyright © 2014-2016 Intel Corporation */ +#include + #include "i915_drv.h" #include "i915_gem_object.h" #include "i915_scatterlist.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index cc9fe258fba7..3414122245f0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -7,6 +7,8 @@ #include #include +#include + #include "gem/i915_gem_region.h" #include "i915_drv.h" #include "i915_gemfs.h" diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index b128ccd2fb98..6d7ec3bf1f32 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -3,6 +3,8 @@ * Copyright © 2008-2021 Intel Corporation */ +#include + #include "gem/i915_gem_internal.h" #include "gen2_engine_cs.h" diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 2962be6d4d00..b9640212d659 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -3,6 +3,8 @@ * Copyright © 2016-2018 Intel Corporation */ +#include + #include "gem/i915_gem_internal.h" #include "i915_active.h" diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c index ddbea939b1dc..733465658e0f 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c @@ -3,6 +3,8 @@ * Copyright © 2021 Intel Corporation */ +#include + #include "i915_drv.h" #include "i915_reg.h" #include "intel_guc_slpc.h" diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index dd588ecf048a..549d5919dc70 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -5,6 +5,8 @@ #include #include + +#include #include #include "gem/i915_gem_lmem.h" diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index aea4c30645ff..5f6e41636655 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -25,6 +25,8 @@ * */ +#include + #include "gt/intel_engine.h" #include "gt/intel_engine_regs.h" #include "gt/intel_gpu_commands.h" diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 195ece479271..27f95f5f395d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -51,7 +51,6 @@ #include #include -#include #include #include #include diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6eb568925080..db897fb11f9b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -25,7 +25,6 @@ * */ -#include #include #include #include @@ -37,6 +36,9 @@ #include #include +#include +#include + #include "display/intel_display.h" #include "display/intel_frontbuffer.h" diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index c859e875a10c..53d64c217de1 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -34,6 +34,7 @@ #include #include +#include #include #include "display/intel_dmc.h" -- cgit v1.2.3