From 22b7a426bbe1ebe1520f92da4cd1617d1e1b5fc4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 20 Jun 2019 15:20:51 +0100 Subject: drm/i915/execlists: Preempt-to-busy When using a global seqno, we required a precise stop-the-workd event to handle preemption and unwind the global seqno counter. To accomplish this, we would preempt to a special out-of-band context and wait for the machine to report that it was idle. Given an idle machine, we could very precisely see which requests had completed and which we needed to feed back into the run queue. However, now that we have scrapped the global seqno, we no longer need to precisely unwind the global counter and only track requests by their per-context seqno. This allows us to loosely unwind inflight requests while scheduling a preemption, with the enormous caveat that the requests we put back on the run queue are still _inflight_ (until the preemption request is complete). This makes request tracking much more messy, as at any point then we can see a completed request that we believe is not currently scheduled for execution. We also have to be careful not to rewind RING_TAIL past RING_HEAD on preempting to the running context, and for this we use a semaphore to prevent completion of the request before continuing. To accomplish this feat, we change how we track requests scheduled to the HW. Instead of appending our requests onto a single list as we submit, we track each submission to ELSP as its own block. Then upon receiving the CS preemption event, we promote the pending block to the inflight block (discarding what was previously being tracked). As normal CS completion events arrive, we then remove stale entries from the inflight tracker. v2: Be a tinge paranoid and ensure we flush the write into the HWS page for the GPU semaphore to pick in a timely fashion. Signed-off-by: Chris Wilson Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190620142052.19311-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index edbbdfec24ab..bebc1e9b4a5e 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -28,6 +28,7 @@ #include #include +#include "gt/intel_context_types.h" #include "gt/intel_engine_types.h" #include "i915_gem.h" -- cgit v1.2.3 From f0c02c1b91888ccac539388eacb0659bf263a557 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Fri, 21 Jun 2019 08:08:10 +0100 Subject: drm/i915: Rename i915_timeline to intel_timeline and move under gt Move all timeline code under gt and rename to intel_gt prefix. Signed-off-by: Tvrtko Ursulin Suggested-by: Chris Wilson Reviewed-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190621070811.7006-32-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/Makefile | 2 +- drivers/gpu/drm/i915/Makefile.header-test | 1 - drivers/gpu/drm/i915/gem/i915_gem_context.c | 10 +- drivers/gpu/drm/i915/gem/i915_gem_context_types.h | 4 +- drivers/gpu/drm/i915/gem/i915_gem_pm.c | 2 +- drivers/gpu/drm/i915/gt/intel_engine.h | 4 +- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 14 +- drivers/gpu/drm/i915/gt/intel_engine_types.h | 4 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 10 +- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 18 +- drivers/gpu/drm/i915/gt/intel_timeline.c | 591 ++++++++++++++ drivers/gpu/drm/i915/gt/intel_timeline.h | 93 +++ drivers/gpu/drm/i915/gt/intel_timeline_types.h | 67 ++ drivers/gpu/drm/i915/gt/mock_engine.c | 10 +- drivers/gpu/drm/i915/gt/selftest_timeline.c | 845 +++++++++++++++++++++ drivers/gpu/drm/i915/gt/selftests/mock_timeline.c | 29 + drivers/gpu/drm/i915/gt/selftests/mock_timeline.h | 15 + drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 8 +- drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +- drivers/gpu/drm/i915/i915_request.c | 14 +- drivers/gpu/drm/i915/i915_request.h | 8 +- drivers/gpu/drm/i915/i915_timeline.c | 591 -------------- drivers/gpu/drm/i915/i915_timeline.h | 93 --- drivers/gpu/drm/i915/i915_timeline_types.h | 67 -- .../gpu/drm/i915/selftests/i915_live_selftests.h | 2 +- .../gpu/drm/i915/selftests/i915_mock_selftests.h | 2 +- drivers/gpu/drm/i915/selftests/i915_timeline.c | 845 --------------------- drivers/gpu/drm/i915/selftests/mock_gem_device.c | 6 +- drivers/gpu/drm/i915/selftests/mock_timeline.c | 29 - drivers/gpu/drm/i915/selftests/mock_timeline.h | 15 - 32 files changed, 1702 insertions(+), 1703 deletions(-) create mode 100644 drivers/gpu/drm/i915/gt/intel_timeline.c create mode 100644 drivers/gpu/drm/i915/gt/intel_timeline.h create mode 100644 drivers/gpu/drm/i915/gt/intel_timeline_types.h create mode 100644 drivers/gpu/drm/i915/gt/selftest_timeline.c create mode 100644 drivers/gpu/drm/i915/gt/selftests/mock_timeline.c create mode 100644 drivers/gpu/drm/i915/gt/selftests/mock_timeline.h delete mode 100644 drivers/gpu/drm/i915/i915_timeline.c delete mode 100644 drivers/gpu/drm/i915/i915_timeline.h delete mode 100644 drivers/gpu/drm/i915/i915_timeline_types.h delete mode 100644 drivers/gpu/drm/i915/selftests/i915_timeline.c delete mode 100644 drivers/gpu/drm/i915/selftests/mock_timeline.c delete mode 100644 drivers/gpu/drm/i915/selftests/mock_timeline.h (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 19f8b6745772..84ac0fd1b8d0 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -82,6 +82,7 @@ gt-y += \ gt/intel_ringbuffer.o \ gt/intel_mocs.o \ gt/intel_sseu.o \ + gt/intel_timeline.o \ gt/intel_workarounds.o gt-$(CONFIG_DRM_I915_SELFTEST) += \ gt/mock_engine.o @@ -127,7 +128,6 @@ i915-y += \ i915_query.o \ i915_request.o \ i915_scheduler.o \ - i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ intel_wopcm.o diff --git a/drivers/gpu/drm/i915/Makefile.header-test b/drivers/gpu/drm/i915/Makefile.header-test index cb74242f9c3b..b1c3e642f621 100644 --- a/drivers/gpu/drm/i915/Makefile.header-test +++ b/drivers/gpu/drm/i915/Makefile.header-test @@ -12,7 +12,6 @@ header_test := \ i915_priolist_types.h \ i915_reg.h \ i915_scheduler_types.h \ - i915_timeline_types.h \ i915_utils.h \ intel_csr.h \ intel_drv.h \ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index fb691535fbf2..628673d1d7f8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -316,7 +316,7 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) mutex_destroy(&ctx->engines_mutex); if (ctx->timeline) - i915_timeline_put(ctx->timeline); + intel_timeline_put(ctx->timeline); kfree(ctx->name); put_pid(ctx->pid); @@ -528,9 +528,9 @@ i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags) } if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) { - struct i915_timeline *timeline; + struct intel_timeline *timeline; - timeline = i915_timeline_create(&dev_priv->gt, NULL); + timeline = intel_timeline_create(&dev_priv->gt, NULL); if (IS_ERR(timeline)) { context_close(ctx); return ERR_CAST(timeline); @@ -2015,8 +2015,8 @@ static int clone_timeline(struct i915_gem_context *dst, GEM_BUG_ON(src->timeline == dst->timeline); if (dst->timeline) - i915_timeline_put(dst->timeline); - dst->timeline = i915_timeline_get(src->timeline); + intel_timeline_put(dst->timeline); + dst->timeline = intel_timeline_get(src->timeline); } return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index cc513410eeef..0ee61482ef94 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -26,7 +26,7 @@ struct pid; struct drm_i915_private; struct drm_i915_file_private; struct i915_address_space; -struct i915_timeline; +struct intel_timeline; struct intel_ring; struct i915_gem_engines { @@ -77,7 +77,7 @@ struct i915_gem_context { struct i915_gem_engines __rcu *engines; struct mutex engines_mutex; /* guards writes to engines */ - struct i915_timeline *timeline; + struct intel_timeline *timeline; /** * @vm: unique address space (GTT) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 05011d4a3b88..8f721cf0ab99 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -38,7 +38,7 @@ static void i915_gem_park(struct drm_i915_private *i915) i915_gem_batch_pool_fini(&engine->batch_pool); } - i915_timelines_park(i915); + intel_timelines_park(i915); i915_vma_parked(i915); i915_globals_park(); diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 9bb6ff76680e..557b08b13feb 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -14,7 +14,7 @@ #include "i915_reg.h" #include "i915_request.h" #include "i915_selftest.h" -#include "i915_timeline.h" +#include "gt/intel_timeline.h" #include "intel_engine_types.h" #include "intel_gpu_commands.h" #include "intel_workarounds.h" @@ -200,7 +200,7 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, - struct i915_timeline *timeline, + struct intel_timeline *timeline, int size); int intel_ring_pin(struct intel_ring *ring); void intel_ring_reset(struct intel_ring *ring, u32 tail); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 89edf97d8ad7..e30212e219ec 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -724,7 +724,7 @@ void intel_engines_set_scheduler_caps(struct drm_i915_private *i915) struct measure_breadcrumb { struct i915_request rq; - struct i915_timeline timeline; + struct intel_timeline timeline; struct intel_ring ring; u32 cs[1024]; }; @@ -740,9 +740,9 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) if (!frame) return -ENOMEM; - if (i915_timeline_init(&frame->timeline, - engine->gt, - engine->status_page.vma)) + if (intel_timeline_init(&frame->timeline, + engine->gt, + engine->status_page.vma)) goto out_frame; INIT_LIST_HEAD(&frame->ring.request_list); @@ -757,17 +757,17 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) frame->rq.ring = &frame->ring; frame->rq.timeline = &frame->timeline; - dw = i915_timeline_pin(&frame->timeline); + dw = intel_timeline_pin(&frame->timeline); if (dw < 0) goto out_timeline; dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ - i915_timeline_unpin(&frame->timeline); + intel_timeline_unpin(&frame->timeline); out_timeline: - i915_timeline_fini(&frame->timeline); + intel_timeline_fini(&frame->timeline); out_frame: kfree(frame); return dw; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index fb65e96fa36b..7e056114344e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -20,7 +20,7 @@ #include "i915_pmu.h" #include "i915_priolist_types.h" #include "i915_selftest.h" -#include "i915_timeline_types.h" +#include "gt/intel_timeline_types.h" #include "intel_sseu.h" #include "intel_wakeref.h" #include "intel_workarounds_types.h" @@ -68,7 +68,7 @@ struct intel_ring { struct i915_vma *vma; void *vaddr; - struct i915_timeline *timeline; + struct intel_timeline *timeline; struct list_head request_list; struct list_head active_link; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 415fdf2eb997..3abcec3e4e0e 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3005,13 +3005,13 @@ err_unpin_ctx: return ret; } -static struct i915_timeline * +static struct intel_timeline * get_timeline(struct i915_gem_context *ctx, struct intel_gt *gt) { if (ctx->timeline) - return i915_timeline_get(ctx->timeline); + return intel_timeline_get(ctx->timeline); else - return i915_timeline_create(gt, NULL); + return intel_timeline_create(gt, NULL); } static int execlists_context_deferred_alloc(struct intel_context *ce, @@ -3021,7 +3021,7 @@ static int execlists_context_deferred_alloc(struct intel_context *ce, struct i915_vma *vma; u32 context_size; struct intel_ring *ring; - struct i915_timeline *timeline; + struct intel_timeline *timeline; int ret; if (ce->state) @@ -3054,7 +3054,7 @@ static int execlists_context_deferred_alloc(struct intel_context *ce, ring = intel_engine_create_ring(engine, timeline, ce->gem_context->ring_size); - i915_timeline_put(timeline); + intel_timeline_put(timeline); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto error_deref_obj; diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 5297b3acb56d..3c925af64793 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -851,7 +851,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) static bool __i915_gem_unset_wedged(struct drm_i915_private *i915) { struct i915_gpu_error *error = &i915->gpu_error; - struct i915_timeline *tl; + struct intel_timeline *tl; if (!test_bit(I915_WEDGED, &error->flags)) return true; diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 9a748be0ce0c..aa483bba04bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1156,7 +1156,7 @@ int intel_ring_pin(struct intel_ring *ring) if (atomic_fetch_inc(&ring->pin_count)) return 0; - ret = i915_timeline_pin(ring->timeline); + ret = intel_timeline_pin(ring->timeline); if (ret) goto err_unpin; @@ -1194,7 +1194,7 @@ int intel_ring_pin(struct intel_ring *ring) err_ring: i915_vma_unpin(vma); err_timeline: - i915_timeline_unpin(ring->timeline); + intel_timeline_unpin(ring->timeline); err_unpin: atomic_dec(&ring->pin_count); return ret; @@ -1231,7 +1231,7 @@ void intel_ring_unpin(struct intel_ring *ring) ring->vma->obj->pin_global--; i915_vma_unpin(ring->vma); - i915_timeline_unpin(ring->timeline); + intel_timeline_unpin(ring->timeline); } static struct i915_vma *create_ring_vma(struct i915_ggtt *ggtt, int size) @@ -1267,7 +1267,7 @@ err: struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, - struct i915_timeline *timeline, + struct intel_timeline *timeline, int size) { struct drm_i915_private *i915 = engine->i915; @@ -1283,7 +1283,7 @@ intel_engine_create_ring(struct intel_engine_cs *engine, kref_init(&ring->ref); INIT_LIST_HEAD(&ring->request_list); - ring->timeline = i915_timeline_get(timeline); + ring->timeline = intel_timeline_get(timeline); ring->size = size; /* Workaround an erratum on the i830 which causes a hang if @@ -1313,7 +1313,7 @@ void intel_ring_free(struct kref *ref) i915_vma_close(ring->vma); i915_vma_put(ring->vma); - i915_timeline_put(ring->timeline); + intel_timeline_put(ring->timeline); kfree(ring); } @@ -2269,11 +2269,11 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) int intel_ring_submission_init(struct intel_engine_cs *engine) { - struct i915_timeline *timeline; + struct intel_timeline *timeline; struct intel_ring *ring; int err; - timeline = i915_timeline_create(engine->gt, engine->status_page.vma); + timeline = intel_timeline_create(engine->gt, engine->status_page.vma); if (IS_ERR(timeline)) { err = PTR_ERR(timeline); goto err; @@ -2281,7 +2281,7 @@ int intel_ring_submission_init(struct intel_engine_cs *engine) GEM_BUG_ON(timeline->has_initial_breadcrumb); ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); - i915_timeline_put(timeline); + intel_timeline_put(timeline); if (IS_ERR(ring)) { err = PTR_ERR(ring); goto err; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c new file mode 100644 index 000000000000..1a3f04458730 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -0,0 +1,591 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016-2018 Intel Corporation + */ + +#include "gt/intel_gt_types.h" + +#include "i915_drv.h" + +#include "i915_active.h" +#include "i915_syncmap.h" +#include "gt/intel_timeline.h" + +#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) +#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) + +struct intel_timeline_hwsp { + struct intel_gt *gt; + struct i915_gt_timelines *gt_timelines; + struct list_head free_link; + struct i915_vma *vma; + u64 free_bitmap; +}; + +struct intel_timeline_cacheline { + struct i915_active active; + struct intel_timeline_hwsp *hwsp; + void *vaddr; +#define CACHELINE_BITS 6 +#define CACHELINE_FREE CACHELINE_BITS +}; + +static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) +{ + struct drm_i915_private *i915 = gt->i915; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) + i915_gem_object_put(obj); + + return vma; +} + +static struct i915_vma * +hwsp_alloc(struct intel_timeline *timeline, unsigned int *cacheline) +{ + struct i915_gt_timelines *gt = &timeline->gt->timelines; + struct intel_timeline_hwsp *hwsp; + + BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); + + spin_lock_irq(>->hwsp_lock); + + /* hwsp_free_list only contains HWSP that have available cachelines */ + hwsp = list_first_entry_or_null(>->hwsp_free_list, + typeof(*hwsp), free_link); + if (!hwsp) { + struct i915_vma *vma; + + spin_unlock_irq(>->hwsp_lock); + + hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); + if (!hwsp) + return ERR_PTR(-ENOMEM); + + vma = __hwsp_alloc(timeline->gt); + if (IS_ERR(vma)) { + kfree(hwsp); + return vma; + } + + vma->private = hwsp; + hwsp->gt = timeline->gt; + hwsp->vma = vma; + hwsp->free_bitmap = ~0ull; + hwsp->gt_timelines = gt; + + spin_lock_irq(>->hwsp_lock); + list_add(&hwsp->free_link, >->hwsp_free_list); + } + + GEM_BUG_ON(!hwsp->free_bitmap); + *cacheline = __ffs64(hwsp->free_bitmap); + hwsp->free_bitmap &= ~BIT_ULL(*cacheline); + if (!hwsp->free_bitmap) + list_del(&hwsp->free_link); + + spin_unlock_irq(>->hwsp_lock); + + GEM_BUG_ON(hwsp->vma->private != hwsp); + return hwsp->vma; +} + +static void __idle_hwsp_free(struct intel_timeline_hwsp *hwsp, int cacheline) +{ + struct i915_gt_timelines *gt = hwsp->gt_timelines; + unsigned long flags; + + spin_lock_irqsave(>->hwsp_lock, flags); + + /* As a cacheline becomes available, publish the HWSP on the freelist */ + if (!hwsp->free_bitmap) + list_add_tail(&hwsp->free_link, >->hwsp_free_list); + + GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap)); + hwsp->free_bitmap |= BIT_ULL(cacheline); + + /* And if no one is left using it, give the page back to the system */ + if (hwsp->free_bitmap == ~0ull) { + i915_vma_put(hwsp->vma); + list_del(&hwsp->free_link); + kfree(hwsp); + } + + spin_unlock_irqrestore(>->hwsp_lock, flags); +} + +static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) +{ + GEM_BUG_ON(!i915_active_is_idle(&cl->active)); + + i915_gem_object_unpin_map(cl->hwsp->vma->obj); + i915_vma_put(cl->hwsp->vma); + __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); + + i915_active_fini(&cl->active); + kfree(cl); +} + +static void __cacheline_retire(struct i915_active *active) +{ + struct intel_timeline_cacheline *cl = + container_of(active, typeof(*cl), active); + + i915_vma_unpin(cl->hwsp->vma); + if (ptr_test_bit(cl->vaddr, CACHELINE_FREE)) + __idle_cacheline_free(cl); +} + +static struct intel_timeline_cacheline * +cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) +{ + struct intel_timeline_cacheline *cl; + void *vaddr; + + GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS)); + + cl = kmalloc(sizeof(*cl), GFP_KERNEL); + if (!cl) + return ERR_PTR(-ENOMEM); + + vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + kfree(cl); + return ERR_CAST(vaddr); + } + + i915_vma_get(hwsp->vma); + cl->hwsp = hwsp; + cl->vaddr = page_pack_bits(vaddr, cacheline); + + i915_active_init(hwsp->gt->i915, &cl->active, __cacheline_retire); + + return cl; +} + +static void cacheline_acquire(struct intel_timeline_cacheline *cl) +{ + if (cl && i915_active_acquire(&cl->active)) + __i915_vma_pin(cl->hwsp->vma); +} + +static void cacheline_release(struct intel_timeline_cacheline *cl) +{ + if (cl) + i915_active_release(&cl->active); +} + +static void cacheline_free(struct intel_timeline_cacheline *cl) +{ + GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); + cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); + + if (i915_active_is_idle(&cl->active)) + __idle_cacheline_free(cl); +} + +int intel_timeline_init(struct intel_timeline *timeline, + struct intel_gt *gt, + struct i915_vma *hwsp) +{ + void *vaddr; + + /* + * Ideally we want a set of engines on a single leaf as we expect + * to mostly be tracking synchronisation between engines. It is not + * a huge issue if this is not the case, but we may want to mitigate + * any page crossing penalties if they become an issue. + * + * Called during early_init before we know how many engines there are. + */ + BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); + + timeline->gt = gt; + timeline->pin_count = 0; + timeline->has_initial_breadcrumb = !hwsp; + timeline->hwsp_cacheline = NULL; + + if (!hwsp) { + struct intel_timeline_cacheline *cl; + unsigned int cacheline; + + hwsp = hwsp_alloc(timeline, &cacheline); + if (IS_ERR(hwsp)) + return PTR_ERR(hwsp); + + cl = cacheline_alloc(hwsp->private, cacheline); + if (IS_ERR(cl)) { + __idle_hwsp_free(hwsp->private, cacheline); + return PTR_ERR(cl); + } + + timeline->hwsp_cacheline = cl; + timeline->hwsp_offset = cacheline * CACHELINE_BYTES; + + vaddr = page_mask_bits(cl->vaddr); + } else { + timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; + + vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + } + + timeline->hwsp_seqno = + memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); + + timeline->hwsp_ggtt = i915_vma_get(hwsp); + GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); + + timeline->fence_context = dma_fence_context_alloc(1); + + mutex_init(&timeline->mutex); + + INIT_ACTIVE_REQUEST(&timeline->last_request); + INIT_LIST_HEAD(&timeline->requests); + + i915_syncmap_init(&timeline->sync); + + return 0; +} + +static void timelines_init(struct intel_gt *gt) +{ + struct i915_gt_timelines *timelines = >->timelines; + + mutex_init(&timelines->mutex); + INIT_LIST_HEAD(&timelines->active_list); + + spin_lock_init(&timelines->hwsp_lock); + INIT_LIST_HEAD(&timelines->hwsp_free_list); + + /* via i915_gem_wait_for_idle() */ + i915_gem_shrinker_taints_mutex(gt->i915, &timelines->mutex); +} + +void intel_timelines_init(struct drm_i915_private *i915) +{ + timelines_init(&i915->gt); +} + +static void timeline_add_to_active(struct intel_timeline *tl) +{ + struct i915_gt_timelines *gt = &tl->gt->timelines; + + mutex_lock(>->mutex); + list_add(&tl->link, >->active_list); + mutex_unlock(>->mutex); +} + +static void timeline_remove_from_active(struct intel_timeline *tl) +{ + struct i915_gt_timelines *gt = &tl->gt->timelines; + + mutex_lock(>->mutex); + list_del(&tl->link); + mutex_unlock(>->mutex); +} + +static void timelines_park(struct intel_gt *gt) +{ + struct i915_gt_timelines *timelines = >->timelines; + struct intel_timeline *timeline; + + mutex_lock(&timelines->mutex); + list_for_each_entry(timeline, &timelines->active_list, link) { + /* + * All known fences are completed so we can scrap + * the current sync point tracking and start afresh, + * any attempt to wait upon a previous sync point + * will be skipped as the fence was signaled. + */ + i915_syncmap_free(&timeline->sync); + } + mutex_unlock(&timelines->mutex); +} + +/** + * intel_timelines_park - called when the driver idles + * @i915: the drm_i915_private device + * + * When the driver is completely idle, we know that all of our sync points + * have been signaled and our tracking is then entirely redundant. Any request + * to wait upon an older sync point will be completed instantly as we know + * the fence is signaled and therefore we will not even look them up in the + * sync point map. + */ +void intel_timelines_park(struct drm_i915_private *i915) +{ + timelines_park(&i915->gt); +} + +void intel_timeline_fini(struct intel_timeline *timeline) +{ + GEM_BUG_ON(timeline->pin_count); + GEM_BUG_ON(!list_empty(&timeline->requests)); + + i915_syncmap_free(&timeline->sync); + + if (timeline->hwsp_cacheline) + cacheline_free(timeline->hwsp_cacheline); + else + i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); + + i915_vma_put(timeline->hwsp_ggtt); +} + +struct intel_timeline * +intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) +{ + struct intel_timeline *timeline; + int err; + + timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); + if (!timeline) + return ERR_PTR(-ENOMEM); + + err = intel_timeline_init(timeline, gt, global_hwsp); + if (err) { + kfree(timeline); + return ERR_PTR(err); + } + + kref_init(&timeline->kref); + + return timeline; +} + +int intel_timeline_pin(struct intel_timeline *tl) +{ + int err; + + if (tl->pin_count++) + return 0; + GEM_BUG_ON(!tl->pin_count); + + err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) + goto unpin; + + tl->hwsp_offset = + i915_ggtt_offset(tl->hwsp_ggtt) + + offset_in_page(tl->hwsp_offset); + + cacheline_acquire(tl->hwsp_cacheline); + timeline_add_to_active(tl); + + return 0; + +unpin: + tl->pin_count = 0; + return err; +} + +static u32 timeline_advance(struct intel_timeline *tl) +{ + GEM_BUG_ON(!tl->pin_count); + GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); + + return tl->seqno += 1 + tl->has_initial_breadcrumb; +} + +static void timeline_rollback(struct intel_timeline *tl) +{ + tl->seqno -= 1 + tl->has_initial_breadcrumb; +} + +static noinline int +__intel_timeline_get_seqno(struct intel_timeline *tl, + struct i915_request *rq, + u32 *seqno) +{ + struct intel_timeline_cacheline *cl; + unsigned int cacheline; + struct i915_vma *vma; + void *vaddr; + int err; + + /* + * If there is an outstanding GPU reference to this cacheline, + * such as it being sampled by a HW semaphore on another timeline, + * we cannot wraparound our seqno value (the HW semaphore does + * a strict greater-than-or-equals compare, not i915_seqno_passed). + * So if the cacheline is still busy, we must detach ourselves + * from it and leave it inflight alongside its users. + * + * However, if nobody is watching and we can guarantee that nobody + * will, we could simply reuse the same cacheline. + * + * if (i915_active_request_is_signaled(&tl->last_request) && + * i915_active_is_signaled(&tl->hwsp_cacheline->active)) + * return 0; + * + * That seems unlikely for a busy timeline that needed to wrap in + * the first place, so just replace the cacheline. + */ + + vma = hwsp_alloc(tl, &cacheline); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_rollback; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (err) { + __idle_hwsp_free(vma->private, cacheline); + goto err_rollback; + } + + cl = cacheline_alloc(vma->private, cacheline); + if (IS_ERR(cl)) { + err = PTR_ERR(cl); + __idle_hwsp_free(vma->private, cacheline); + goto err_unpin; + } + GEM_BUG_ON(cl->hwsp->vma != vma); + + /* + * Attach the old cacheline to the current request, so that we only + * free it after the current request is retired, which ensures that + * all writes into the cacheline from previous requests are complete. + */ + err = i915_active_ref(&tl->hwsp_cacheline->active, + tl->fence_context, rq); + if (err) + goto err_cacheline; + + cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */ + cacheline_free(tl->hwsp_cacheline); + + i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */ + i915_vma_put(tl->hwsp_ggtt); + + tl->hwsp_ggtt = i915_vma_get(vma); + + vaddr = page_mask_bits(cl->vaddr); + tl->hwsp_offset = cacheline * CACHELINE_BYTES; + tl->hwsp_seqno = + memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); + + tl->hwsp_offset += i915_ggtt_offset(vma); + + cacheline_acquire(cl); + tl->hwsp_cacheline = cl; + + *seqno = timeline_advance(tl); + GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); + return 0; + +err_cacheline: + cacheline_free(cl); +err_unpin: + i915_vma_unpin(vma); +err_rollback: + timeline_rollback(tl); + return err; +} + +int intel_timeline_get_seqno(struct intel_timeline *tl, + struct i915_request *rq, + u32 *seqno) +{ + *seqno = timeline_advance(tl); + + /* Replace the HWSP on wraparound for HW semaphores */ + if (unlikely(!*seqno && tl->hwsp_cacheline)) + return __intel_timeline_get_seqno(tl, rq, seqno); + + return 0; +} + +static int cacheline_ref(struct intel_timeline_cacheline *cl, + struct i915_request *rq) +{ + return i915_active_ref(&cl->active, rq->fence.context, rq); +} + +int intel_timeline_read_hwsp(struct i915_request *from, + struct i915_request *to, + u32 *hwsp) +{ + struct intel_timeline_cacheline *cl = from->hwsp_cacheline; + struct intel_timeline *tl = from->timeline; + int err; + + GEM_BUG_ON(to->timeline == tl); + + mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); + err = i915_request_completed(from); + if (!err) + err = cacheline_ref(cl, to); + if (!err) { + if (likely(cl == tl->hwsp_cacheline)) { + *hwsp = tl->hwsp_offset; + } else { /* across a seqno wrap, recover the original offset */ + *hwsp = i915_ggtt_offset(cl->hwsp->vma) + + ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * + CACHELINE_BYTES; + } + } + mutex_unlock(&tl->mutex); + + return err; +} + +void intel_timeline_unpin(struct intel_timeline *tl) +{ + GEM_BUG_ON(!tl->pin_count); + if (--tl->pin_count) + return; + + timeline_remove_from_active(tl); + cacheline_release(tl->hwsp_cacheline); + + /* + * Since this timeline is idle, all bariers upon which we were waiting + * must also be complete and so we can discard the last used barriers + * without loss of information. + */ + i915_syncmap_free(&tl->sync); + + __i915_vma_unpin(tl->hwsp_ggtt); +} + +void __intel_timeline_free(struct kref *kref) +{ + struct intel_timeline *timeline = + container_of(kref, typeof(*timeline), kref); + + intel_timeline_fini(timeline); + kfree(timeline); +} + +static void timelines_fini(struct intel_gt *gt) +{ + struct i915_gt_timelines *timelines = >->timelines; + + GEM_BUG_ON(!list_empty(&timelines->active_list)); + GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); + + mutex_destroy(&timelines->mutex); +} + +void intel_timelines_fini(struct drm_i915_private *i915) +{ + timelines_fini(&i915->gt); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "gt/selftests/mock_timeline.c" +#include "gt/selftest_timeline.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h new file mode 100644 index 000000000000..e08cebf64833 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -0,0 +1,93 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef I915_TIMELINE_H +#define I915_TIMELINE_H + +#include + +#include "i915_active.h" +#include "i915_syncmap.h" +#include "gt/intel_timeline_types.h" + +int intel_timeline_init(struct intel_timeline *tl, + struct intel_gt *gt, + struct i915_vma *hwsp); +void intel_timeline_fini(struct intel_timeline *tl); + +struct intel_timeline * +intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp); + +static inline struct intel_timeline * +intel_timeline_get(struct intel_timeline *timeline) +{ + kref_get(&timeline->kref); + return timeline; +} + +void __intel_timeline_free(struct kref *kref); +static inline void intel_timeline_put(struct intel_timeline *timeline) +{ + kref_put(&timeline->kref, __intel_timeline_free); +} + +static inline int __intel_timeline_sync_set(struct intel_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_set(&tl->sync, context, seqno); +} + +static inline int intel_timeline_sync_set(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno); +} + +static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl, + u64 context, u32 seqno) +{ + return i915_syncmap_is_later(&tl->sync, context, seqno); +} + +static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); +} + +int intel_timeline_pin(struct intel_timeline *tl); +int intel_timeline_get_seqno(struct intel_timeline *tl, + struct i915_request *rq, + u32 *seqno); +void intel_timeline_unpin(struct intel_timeline *tl); + +int intel_timeline_read_hwsp(struct i915_request *from, + struct i915_request *until, + u32 *hwsp_offset); + +void intel_timelines_init(struct drm_i915_private *i915); +void intel_timelines_park(struct drm_i915_private *i915); +void intel_timelines_fini(struct drm_i915_private *i915); + +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h new file mode 100644 index 000000000000..9a71aea7a338 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016 Intel Corporation + */ + +#ifndef __I915_TIMELINE_TYPES_H__ +#define __I915_TIMELINE_TYPES_H__ + +#include +#include +#include +#include + +#include "i915_active_types.h" + +struct drm_i915_private; +struct i915_vma; +struct intel_timeline_cacheline; +struct i915_syncmap; + +struct intel_timeline { + u64 fence_context; + u32 seqno; + + struct mutex mutex; /* protects the flow of requests */ + + unsigned int pin_count; + const u32 *hwsp_seqno; + struct i915_vma *hwsp_ggtt; + u32 hwsp_offset; + + struct intel_timeline_cacheline *hwsp_cacheline; + + bool has_initial_breadcrumb; + + /** + * List of breadcrumbs associated with GPU requests currently + * outstanding. + */ + struct list_head requests; + + /* Contains an RCU guarded pointer to the last request. No reference is + * held to the request, users must carefully acquire a reference to + * the request using i915_active_request_get_request_rcu(), or hold the + * struct_mutex. + */ + struct i915_active_request last_request; + + /** + * We track the most recent seqno that we wait on in every context so + * that we only have to emit a new await and dependency on a more + * recent sync point. As the contexts may be executed out-of-order, we + * have to track each individually and can not rely on an absolute + * global_seqno. When we know that all tracked fences are completed + * (i.e. when the driver is idle), we know that the syncmap is + * redundant and we can discard it without loss of generality. + */ + struct i915_syncmap *sync; + + struct list_head link; + struct intel_gt *gt; + + struct kref kref; +}; + +#endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 423027aa71cd..bf0974b12f3d 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -33,15 +33,15 @@ struct mock_ring { struct intel_ring base; - struct i915_timeline timeline; + struct intel_timeline timeline; }; -static void mock_timeline_pin(struct i915_timeline *tl) +static void mock_timeline_pin(struct intel_timeline *tl) { tl->pin_count++; } -static void mock_timeline_unpin(struct i915_timeline *tl) +static void mock_timeline_unpin(struct intel_timeline *tl) { GEM_BUG_ON(!tl->pin_count); tl->pin_count--; @@ -56,7 +56,7 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) if (!ring) return NULL; - if (i915_timeline_init(&ring->timeline, engine->gt, NULL)) { + if (intel_timeline_init(&ring->timeline, engine->gt, NULL)) { kfree(ring); return NULL; } @@ -78,7 +78,7 @@ static void mock_ring_free(struct intel_ring *base) { struct mock_ring *ring = container_of(base, typeof(*ring), base); - i915_timeline_fini(&ring->timeline); + intel_timeline_fini(&ring->timeline); kfree(ring); } diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c new file mode 100644 index 000000000000..193cc564ade2 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -0,0 +1,845 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#include + +#include "gem/i915_gem_pm.h" + +#include "../selftests/i915_random.h" +#include "../i915_selftest.h" + +#include "../selftests/igt_flush_test.h" +#include "../selftests/mock_gem_device.h" +#include "selftests/mock_timeline.h" + +static struct page *hwsp_page(struct intel_timeline *tl) +{ + struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; + + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + return sg_page(obj->mm.pages->sgl); +} + +static unsigned long hwsp_cacheline(struct intel_timeline *tl) +{ + unsigned long address = (unsigned long)page_address(hwsp_page(tl)); + + return (address + tl->hwsp_offset) / CACHELINE_BYTES; +} + +#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) + +struct mock_hwsp_freelist { + struct drm_i915_private *i915; + struct radix_tree_root cachelines; + struct intel_timeline **history; + unsigned long count, max; + struct rnd_state prng; +}; + +enum { + SHUFFLE = BIT(0), +}; + +static void __mock_hwsp_record(struct mock_hwsp_freelist *state, + unsigned int idx, + struct intel_timeline *tl) +{ + tl = xchg(&state->history[idx], tl); + if (tl) { + radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); + intel_timeline_put(tl); + } +} + +static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, + unsigned int count, + unsigned int flags) +{ + struct intel_timeline *tl; + unsigned int idx; + + while (count--) { + unsigned long cacheline; + int err; + + tl = intel_timeline_create(&state->i915->gt, NULL); + if (IS_ERR(tl)) + return PTR_ERR(tl); + + cacheline = hwsp_cacheline(tl); + err = radix_tree_insert(&state->cachelines, cacheline, tl); + if (err) { + if (err == -EEXIST) { + pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", + cacheline); + } + intel_timeline_put(tl); + return err; + } + + idx = state->count++ % state->max; + __mock_hwsp_record(state, idx, tl); + } + + if (flags & SHUFFLE) + i915_prandom_shuffle(state->history, + sizeof(*state->history), + min(state->count, state->max), + &state->prng); + + count = i915_prandom_u32_max_state(min(state->count, state->max), + &state->prng); + while (count--) { + idx = --state->count % state->max; + __mock_hwsp_record(state, idx, NULL); + } + + return 0; +} + +static int mock_hwsp_freelist(void *arg) +{ + struct mock_hwsp_freelist state; + const struct { + const char *name; + unsigned int flags; + } phases[] = { + { "linear", 0 }, + { "shuffled", SHUFFLE }, + { }, + }, *p; + unsigned int na; + int err = 0; + + INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); + state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); + + state.i915 = mock_gem_device(); + if (!state.i915) + return -ENOMEM; + + /* + * Create a bunch of timelines and check that their HWSP do not overlap. + * Free some, and try again. + */ + + state.max = PAGE_SIZE / sizeof(*state.history); + state.count = 0; + state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL); + if (!state.history) { + err = -ENOMEM; + goto err_put; + } + + mutex_lock(&state.i915->drm.struct_mutex); + for (p = phases; p->name; p++) { + pr_debug("%s(%s)\n", __func__, p->name); + for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { + err = __mock_hwsp_timeline(&state, na, p->flags); + if (err) + goto out; + } + } + +out: + for (na = 0; na < state.max; na++) + __mock_hwsp_record(&state, na, NULL); + mutex_unlock(&state.i915->drm.struct_mutex); + kfree(state.history); +err_put: + drm_dev_put(&state.i915->drm); + return err; +} + +struct __igt_sync { + const char *name; + u32 seqno; + bool expected; + bool set; +}; + +static int __igt_sync(struct intel_timeline *tl, + u64 ctx, + const struct __igt_sync *p, + const char *name) +{ + int ret; + + if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { + pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", + name, p->name, ctx, p->seqno, yesno(p->expected)); + return -EINVAL; + } + + if (p->set) { + ret = __intel_timeline_sync_set(tl, ctx, p->seqno); + if (ret) + return ret; + } + + return 0; +} + +static int igt_sync(void *arg) +{ + const struct __igt_sync pass[] = { + { "unset", 0, false, false }, + { "new", 0, false, true }, + { "0a", 0, true, true }, + { "1a", 1, false, true }, + { "1b", 1, true, true }, + { "0b", 0, true, false }, + { "2a", 2, false, true }, + { "4", 4, false, true }, + { "INT_MAX", INT_MAX, false, true }, + { "INT_MAX-1", INT_MAX-1, true, false }, + { "INT_MAX+1", (u32)INT_MAX+1, false, true }, + { "INT_MAX", INT_MAX, true, false }, + { "UINT_MAX", UINT_MAX, false, true }, + { "wrap", 0, false, true }, + { "unwrap", UINT_MAX, true, false }, + {}, + }, *p; + struct intel_timeline tl; + int order, offset; + int ret = -ENODEV; + + mock_timeline_init(&tl, 0); + for (p = pass; p->name; p++) { + for (order = 1; order < 64; order++) { + for (offset = -1; offset <= (order > 1); offset++) { + u64 ctx = BIT_ULL(order) + offset; + + ret = __igt_sync(&tl, ctx, p, "1"); + if (ret) + goto out; + } + } + } + mock_timeline_fini(&tl); + + mock_timeline_init(&tl, 0); + for (order = 1; order < 64; order++) { + for (offset = -1; offset <= (order > 1); offset++) { + u64 ctx = BIT_ULL(order) + offset; + + for (p = pass; p->name; p++) { + ret = __igt_sync(&tl, ctx, p, "2"); + if (ret) + goto out; + } + } + } + +out: + mock_timeline_fini(&tl); + return ret; +} + +static unsigned int random_engine(struct rnd_state *rnd) +{ + return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd); +} + +static int bench_sync(void *arg) +{ + struct rnd_state prng; + struct intel_timeline tl; + unsigned long end_time, count; + u64 prng32_1M; + ktime_t kt; + int order, last_order; + + mock_timeline_init(&tl, 0); + + /* Lookups from cache are very fast and so the random number generation + * and the loop itself becomes a significant factor in the per-iteration + * timings. We try to compensate the results by measuring the overhead + * of the prng and subtract it from the reported results. + */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u32 x; + + /* Make sure the compiler doesn't optimise away the prng call */ + WRITE_ONCE(x, prandom_u32_state(&prng)); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_debug("%s: %lu random evaluations, %lluns/prng\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count); + + /* Benchmark (only) setting random context ids */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u64 id = i915_prandom_u64_state(&prng); + + __intel_timeline_sync_set(&tl, id, 0); + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); + pr_info("%s: %lu random insertions, %lluns/insert\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + /* Benchmark looking up the exact same context ids as we just set */ + prandom_seed_state(&prng, i915_selftest.random_seed); + end_time = count; + kt = ktime_get(); + while (end_time--) { + u64 id = i915_prandom_u64_state(&prng); + + if (!__intel_timeline_sync_is_later(&tl, id, 0)) { + mock_timeline_fini(&tl); + pr_err("Lookup of %llu failed\n", id); + return -EINVAL; + } + } + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); + pr_info("%s: %lu random lookups, %lluns/lookup\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + mock_timeline_fini(&tl); + cond_resched(); + + mock_timeline_init(&tl, 0); + + /* Benchmark setting the first N (in order) contexts */ + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + __intel_timeline_sync_set(&tl, count++, 0); + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu in-order insertions, %lluns/insert\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + /* Benchmark looking up the exact same context ids as we just set */ + end_time = count; + kt = ktime_get(); + while (end_time--) { + if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) { + pr_err("Lookup of %lu failed\n", end_time); + mock_timeline_fini(&tl); + return -EINVAL; + } + } + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu in-order lookups, %lluns/lookup\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + + mock_timeline_fini(&tl); + cond_resched(); + + mock_timeline_init(&tl, 0); + + /* Benchmark searching for a random context id and maybe changing it */ + prandom_seed_state(&prng, i915_selftest.random_seed); + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + u32 id = random_engine(&prng); + u32 seqno = prandom_u32_state(&prng); + + if (!__intel_timeline_sync_is_later(&tl, id, seqno)) + __intel_timeline_sync_set(&tl, id, seqno); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); + pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", + __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); + mock_timeline_fini(&tl); + cond_resched(); + + /* Benchmark searching for a known context id and changing the seqno */ + for (last_order = 1, order = 1; order < 32; + ({ int tmp = last_order; last_order = order; order += tmp; })) { + unsigned int mask = BIT(order) - 1; + + mock_timeline_init(&tl, 0); + + count = 0; + kt = ktime_get(); + end_time = jiffies + HZ/10; + do { + /* Without assuming too many details of the underlying + * implementation, try to identify its phase-changes + * (if any)! + */ + u64 id = (u64)(count & mask) << order; + + __intel_timeline_sync_is_later(&tl, id, 0); + __intel_timeline_sync_set(&tl, id, 0); + + count++; + } while (!time_after(jiffies, end_time)); + kt = ktime_sub(ktime_get(), kt); + pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", + __func__, count, order, + (long long)div64_ul(ktime_to_ns(kt), count)); + mock_timeline_fini(&tl); + cond_resched(); + } + + return 0; +} + +int intel_timeline_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(mock_hwsp_freelist), + SUBTEST(igt_sync), + SUBTEST(bench_sync), + }; + + return i915_subtests(tests, NULL); +} + +static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (INTEL_GEN(rq->i915) >= 8) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = addr; + *cs++ = 0; + *cs++ = value; + } else if (INTEL_GEN(rq->i915) >= 4) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = 0; + *cs++ = addr; + *cs++ = value; + } else { + *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; + *cs++ = addr; + *cs++ = value; + *cs++ = MI_NOOP; + } + + intel_ring_advance(rq, cs); + + return 0; +} + +static struct i915_request * +tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value) +{ + struct i915_request *rq; + int err; + + lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */ + + err = intel_timeline_pin(tl); + if (err) { + rq = ERR_PTR(err); + goto out; + } + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) + goto out_unpin; + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); + i915_request_add(rq); + if (err) + rq = ERR_PTR(err); + +out_unpin: + intel_timeline_unpin(tl); +out: + if (IS_ERR(rq)) + pr_err("Failed to write to timeline!\n"); + return rq; +} + +static struct intel_timeline * +checked_intel_timeline_create(struct drm_i915_private *i915) +{ + struct intel_timeline *tl; + + tl = intel_timeline_create(&i915->gt, NULL); + if (IS_ERR(tl)) + return tl; + + if (*tl->hwsp_seqno != tl->seqno) { + pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", + *tl->hwsp_seqno, tl->seqno); + intel_timeline_put(tl); + return ERR_PTR(-EINVAL); + } + + return tl; +} + +static int live_hwsp_engine(void *arg) +{ +#define NUM_TIMELINES 4096 + struct drm_i915_private *i915 = arg; + struct intel_timeline **timelines; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count, n; + int err = 0; + + /* + * Create a bunch of timelines and check we can write + * independently to each of their breadcrumb slots. + */ + + timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, + sizeof(*timelines), + GFP_KERNEL); + if (!timelines) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + count = 0; + for_each_engine(engine, i915, id) { + if (!intel_engine_can_store_dword(engine)) + continue; + + for (n = 0; n < NUM_TIMELINES; n++) { + struct intel_timeline *tl; + struct i915_request *rq; + + tl = checked_intel_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + intel_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + timelines[count++] = tl; + } + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (n = 0; n < count; n++) { + struct intel_timeline *tl = timelines[n]; + + if (!err && *tl->hwsp_seqno != n) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + n, *tl->hwsp_seqno); + err = -EINVAL; + } + intel_timeline_put(tl); + } + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + kvfree(timelines); + + return err; +#undef NUM_TIMELINES +} + +static int live_hwsp_alternate(void *arg) +{ +#define NUM_TIMELINES 4096 + struct drm_i915_private *i915 = arg; + struct intel_timeline **timelines; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count, n; + int err = 0; + + /* + * Create a bunch of timelines and check we can write + * independently to each of their breadcrumb slots with adjacent + * engines. + */ + + timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, + sizeof(*timelines), + GFP_KERNEL); + if (!timelines) + return -ENOMEM; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + count = 0; + for (n = 0; n < NUM_TIMELINES; n++) { + for_each_engine(engine, i915, id) { + struct intel_timeline *tl; + struct i915_request *rq; + + if (!intel_engine_can_store_dword(engine)) + continue; + + tl = checked_intel_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + intel_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + timelines[count++] = tl; + } + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + for (n = 0; n < count; n++) { + struct intel_timeline *tl = timelines[n]; + + if (!err && *tl->hwsp_seqno != n) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + n, *tl->hwsp_seqno); + err = -EINVAL; + } + intel_timeline_put(tl); + } + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + kvfree(timelines); + + return err; +#undef NUM_TIMELINES +} + +static int live_hwsp_wrap(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct intel_timeline *tl; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = 0; + + /* + * Across a seqno wrap, we need to keep the old cacheline alive for + * foreign GPU references. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + tl = intel_timeline_create(&i915->gt, NULL); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out_rpm; + } + if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) + goto out_free; + + err = intel_timeline_pin(tl); + if (err) + goto out_free; + + for_each_engine(engine, i915, id) { + const u32 *hwsp_seqno[2]; + struct i915_request *rq; + u32 seqno[2]; + + if (!intel_engine_can_store_dword(engine)) + continue; + + rq = i915_request_create(engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + tl->seqno = -4u; + + err = intel_timeline_get_seqno(tl, rq, &seqno[0]); + if (err) { + i915_request_add(rq); + goto out; + } + pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n", + seqno[0], tl->hwsp_offset); + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]); + if (err) { + i915_request_add(rq); + goto out; + } + hwsp_seqno[0] = tl->hwsp_seqno; + + err = intel_timeline_get_seqno(tl, rq, &seqno[1]); + if (err) { + i915_request_add(rq); + goto out; + } + pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n", + seqno[1], tl->hwsp_offset); + + err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]); + if (err) { + i915_request_add(rq); + goto out; + } + hwsp_seqno[1] = tl->hwsp_seqno; + + /* With wrap should come a new hwsp */ + GEM_BUG_ON(seqno[1] >= seqno[0]); + GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]); + + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("Wait for timeline writes timed out!\n"); + err = -EIO; + goto out; + } + + if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) { + pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n", + *hwsp_seqno[0], *hwsp_seqno[1], + seqno[0], seqno[1]); + err = -EINVAL; + goto out; + } + + i915_retire_requests(i915); /* recycle HWSP */ + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + + intel_timeline_unpin(tl); +out_free: + intel_timeline_put(tl); +out_rpm: + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +static int live_hwsp_recycle(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + intel_wakeref_t wakeref; + unsigned long count; + int err = 0; + + /* + * Check seqno writes into one timeline at a time. We expect to + * recycle the breadcrumb slot between iterations and neither + * want to confuse ourselves or the GPU. + */ + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + count = 0; + for_each_engine(engine, i915, id) { + IGT_TIMEOUT(end_time); + + if (!intel_engine_can_store_dword(engine)) + continue; + + do { + struct intel_timeline *tl; + struct i915_request *rq; + + tl = checked_intel_timeline_create(i915); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto out; + } + + rq = tl_write(tl, engine, count); + if (IS_ERR(rq)) { + intel_timeline_put(tl); + err = PTR_ERR(rq); + goto out; + } + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("Wait for timeline writes timed out!\n"); + intel_timeline_put(tl); + err = -EIO; + goto out; + } + + if (*tl->hwsp_seqno != count) { + pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", + count, *tl->hwsp_seqno); + err = -EINVAL; + } + + intel_timeline_put(tl); + count++; + + if (err) + goto out; + + intel_timelines_park(i915); /* Encourage recycling! */ + } while (!__igt_timeout(end_time, NULL)); + } + +out: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} + +int intel_timeline_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_hwsp_recycle), + SUBTEST(live_hwsp_engine), + SUBTEST(live_hwsp_alternate), + SUBTEST(live_hwsp_wrap), + }; + + if (i915_terminally_wedged(i915)) + return 0; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c new file mode 100644 index 000000000000..5c549205828a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.c @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#include "../intel_timeline.h" + +#include "mock_timeline.h" + +void mock_timeline_init(struct intel_timeline *timeline, u64 context) +{ + timeline->gt = NULL; + timeline->fence_context = context; + + mutex_init(&timeline->mutex); + + INIT_ACTIVE_REQUEST(&timeline->last_request); + INIT_LIST_HEAD(&timeline->requests); + + i915_syncmap_init(&timeline->sync); + + INIT_LIST_HEAD(&timeline->link); +} + +void mock_timeline_fini(struct intel_timeline *timeline) +{ + i915_syncmap_free(&timeline->sync); +} diff --git a/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h new file mode 100644 index 000000000000..689efc66c908 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftests/mock_timeline.h @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#ifndef __MOCK_TIMELINE__ +#define __MOCK_TIMELINE__ + +struct intel_timeline; + +void mock_timeline_init(struct intel_timeline *timeline, u64 context); +void mock_timeline_fini(struct intel_timeline *timeline); + +#endif /* !__MOCK_TIMELINE__ */ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b574aea23581..89a21fa4eac2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -89,7 +89,7 @@ #include "i915_gpu_error.h" #include "i915_request.h" #include "i915_scheduler.h" -#include "i915_timeline.h" +#include "gt/intel_timeline.h" #include "i915_vma.h" #include "intel_gvt.h" diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 50d7e1e8d8ad..6e07127242d9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -909,7 +909,7 @@ wait_for_timelines(struct drm_i915_private *i915, unsigned int flags, long timeout) { struct i915_gt_timelines *gt = &i915->gt.timelines; - struct i915_timeline *tl; + struct intel_timeline *tl; mutex_lock(>->mutex); list_for_each_entry(tl, >->active_list, link) { @@ -1487,7 +1487,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) dev_priv->mm.unordered_timeline = dma_fence_context_alloc(1); - i915_timelines_init(dev_priv); + intel_timelines_init(dev_priv); ret = i915_gem_init_userptr(dev_priv); if (ret) @@ -1624,7 +1624,7 @@ err_uc_misc: if (ret != -EIO) { i915_gem_cleanup_userptr(dev_priv); - i915_timelines_fini(dev_priv); + intel_timelines_fini(dev_priv); } if (ret == -EIO) { @@ -1688,7 +1688,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv) intel_uc_fini_misc(dev_priv); i915_gem_cleanup_userptr(dev_priv); - i915_timelines_fini(dev_priv); + intel_timelines_fini(dev_priv); i915_gem_drain_freed_objects(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index e8b9ebe50c4e..028be3b44d07 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -47,7 +47,7 @@ #include "i915_request.h" #include "i915_scatterlist.h" #include "i915_selftest.h" -#include "i915_timeline.h" +#include "gt/intel_timeline.h" #define I915_GTT_PAGE_SIZE_4K BIT_ULL(12) #define I915_GTT_PAGE_SIZE_64K BIT_ULL(16) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 0c99694faab7..5ff87c4a0cd5 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -607,7 +607,7 @@ out: struct i915_request * __i915_request_create(struct intel_context *ce, gfp_t gfp) { - struct i915_timeline *tl = ce->ring->timeline; + struct intel_timeline *tl = ce->ring->timeline; struct i915_request *rq; u32 seqno; int ret; @@ -656,7 +656,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) } } - ret = i915_timeline_get_seqno(tl, rq, &seqno); + ret = intel_timeline_get_seqno(tl, rq, &seqno); if (ret) goto err_free; @@ -775,7 +775,7 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) return 0; signal = list_prev_entry(signal, ring_link); - if (i915_timeline_sync_is_later(rq->timeline, &signal->fence)) + if (intel_timeline_sync_is_later(rq->timeline, &signal->fence)) return 0; return i915_sw_fence_await_dma_fence(&rq->submit, @@ -829,7 +829,7 @@ emit_semaphore_wait(struct i915_request *to, return err; /* We need to pin the signaler's HWSP until we are finished reading. */ - err = i915_timeline_read_hwsp(from, to, &hwsp_offset); + err = intel_timeline_read_hwsp(from, to, &hwsp_offset); if (err) return err; @@ -940,7 +940,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Squash repeated waits to the same timelines */ if (fence->context != rq->i915->mm.unordered_timeline && - i915_timeline_sync_is_later(rq->timeline, fence)) + intel_timeline_sync_is_later(rq->timeline, fence)) continue; if (dma_fence_is_i915(fence)) @@ -954,7 +954,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Record the latest fence used against each timeline */ if (fence->context != rq->i915->mm.unordered_timeline) - i915_timeline_sync_set(rq->timeline, fence); + intel_timeline_sync_set(rq->timeline, fence); } while (--nchild); return 0; @@ -1092,7 +1092,7 @@ void i915_request_skip(struct i915_request *rq, int error) static struct i915_request * __i915_request_add_to_timeline(struct i915_request *rq) { - struct i915_timeline *timeline = rq->timeline; + struct intel_timeline *timeline = rq->timeline; struct i915_request *prev; /* diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index bebc1e9b4a5e..b58ceef92e20 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -41,8 +41,8 @@ struct drm_file; struct drm_i915_gem_object; struct i915_request; -struct i915_timeline; -struct i915_timeline_cacheline; +struct intel_timeline; +struct intel_timeline_cacheline; struct i915_capture_list { struct i915_capture_list *next; @@ -113,7 +113,7 @@ struct i915_request { struct intel_engine_cs *engine; struct intel_context *hw_context; struct intel_ring *ring; - struct i915_timeline *timeline; + struct intel_timeline *timeline; struct list_head signal_link; /* @@ -176,7 +176,7 @@ struct i915_request { * inside the timeline's HWSP vma, but it is only valid while this * request has not completed and guarded by the timeline mutex. */ - struct i915_timeline_cacheline *hwsp_cacheline; + struct intel_timeline_cacheline *hwsp_cacheline; /** Position in the ring of the start of the request */ u32 head; diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c deleted file mode 100644 index 3e2c3169dc69..000000000000 --- a/drivers/gpu/drm/i915/i915_timeline.c +++ /dev/null @@ -1,591 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2016-2018 Intel Corporation - */ - -#include "gt/intel_gt_types.h" - -#include "i915_drv.h" - -#include "i915_active.h" -#include "i915_syncmap.h" -#include "i915_timeline.h" - -#define ptr_set_bit(ptr, bit) ((typeof(ptr))((unsigned long)(ptr) | BIT(bit))) -#define ptr_test_bit(ptr, bit) ((unsigned long)(ptr) & BIT(bit)) - -struct i915_timeline_hwsp { - struct intel_gt *gt; - struct i915_gt_timelines *gt_timelines; - struct list_head free_link; - struct i915_vma *vma; - u64 free_bitmap; -}; - -struct i915_timeline_cacheline { - struct i915_active active; - struct i915_timeline_hwsp *hwsp; - void *vaddr; -#define CACHELINE_BITS 6 -#define CACHELINE_FREE CACHELINE_BITS -}; - -static struct i915_vma *__hwsp_alloc(struct intel_gt *gt) -{ - struct drm_i915_private *i915 = gt->i915; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - - obj = i915_gem_object_create_internal(i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); - - vma = i915_vma_instance(obj, >->ggtt->vm, NULL); - if (IS_ERR(vma)) - i915_gem_object_put(obj); - - return vma; -} - -static struct i915_vma * -hwsp_alloc(struct i915_timeline *timeline, unsigned int *cacheline) -{ - struct i915_gt_timelines *gt = &timeline->gt->timelines; - struct i915_timeline_hwsp *hwsp; - - BUILD_BUG_ON(BITS_PER_TYPE(u64) * CACHELINE_BYTES > PAGE_SIZE); - - spin_lock_irq(>->hwsp_lock); - - /* hwsp_free_list only contains HWSP that have available cachelines */ - hwsp = list_first_entry_or_null(>->hwsp_free_list, - typeof(*hwsp), free_link); - if (!hwsp) { - struct i915_vma *vma; - - spin_unlock_irq(>->hwsp_lock); - - hwsp = kmalloc(sizeof(*hwsp), GFP_KERNEL); - if (!hwsp) - return ERR_PTR(-ENOMEM); - - vma = __hwsp_alloc(timeline->gt); - if (IS_ERR(vma)) { - kfree(hwsp); - return vma; - } - - vma->private = hwsp; - hwsp->gt = timeline->gt; - hwsp->vma = vma; - hwsp->free_bitmap = ~0ull; - hwsp->gt_timelines = gt; - - spin_lock_irq(>->hwsp_lock); - list_add(&hwsp->free_link, >->hwsp_free_list); - } - - GEM_BUG_ON(!hwsp->free_bitmap); - *cacheline = __ffs64(hwsp->free_bitmap); - hwsp->free_bitmap &= ~BIT_ULL(*cacheline); - if (!hwsp->free_bitmap) - list_del(&hwsp->free_link); - - spin_unlock_irq(>->hwsp_lock); - - GEM_BUG_ON(hwsp->vma->private != hwsp); - return hwsp->vma; -} - -static void __idle_hwsp_free(struct i915_timeline_hwsp *hwsp, int cacheline) -{ - struct i915_gt_timelines *gt = hwsp->gt_timelines; - unsigned long flags; - - spin_lock_irqsave(>->hwsp_lock, flags); - - /* As a cacheline becomes available, publish the HWSP on the freelist */ - if (!hwsp->free_bitmap) - list_add_tail(&hwsp->free_link, >->hwsp_free_list); - - GEM_BUG_ON(cacheline >= BITS_PER_TYPE(hwsp->free_bitmap)); - hwsp->free_bitmap |= BIT_ULL(cacheline); - - /* And if no one is left using it, give the page back to the system */ - if (hwsp->free_bitmap == ~0ull) { - i915_vma_put(hwsp->vma); - list_del(&hwsp->free_link); - kfree(hwsp); - } - - spin_unlock_irqrestore(>->hwsp_lock, flags); -} - -static void __idle_cacheline_free(struct i915_timeline_cacheline *cl) -{ - GEM_BUG_ON(!i915_active_is_idle(&cl->active)); - - i915_gem_object_unpin_map(cl->hwsp->vma->obj); - i915_vma_put(cl->hwsp->vma); - __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); - - i915_active_fini(&cl->active); - kfree(cl); -} - -static void __cacheline_retire(struct i915_active *active) -{ - struct i915_timeline_cacheline *cl = - container_of(active, typeof(*cl), active); - - i915_vma_unpin(cl->hwsp->vma); - if (ptr_test_bit(cl->vaddr, CACHELINE_FREE)) - __idle_cacheline_free(cl); -} - -static struct i915_timeline_cacheline * -cacheline_alloc(struct i915_timeline_hwsp *hwsp, unsigned int cacheline) -{ - struct i915_timeline_cacheline *cl; - void *vaddr; - - GEM_BUG_ON(cacheline >= BIT(CACHELINE_BITS)); - - cl = kmalloc(sizeof(*cl), GFP_KERNEL); - if (!cl) - return ERR_PTR(-ENOMEM); - - vaddr = i915_gem_object_pin_map(hwsp->vma->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - kfree(cl); - return ERR_CAST(vaddr); - } - - i915_vma_get(hwsp->vma); - cl->hwsp = hwsp; - cl->vaddr = page_pack_bits(vaddr, cacheline); - - i915_active_init(hwsp->gt->i915, &cl->active, __cacheline_retire); - - return cl; -} - -static void cacheline_acquire(struct i915_timeline_cacheline *cl) -{ - if (cl && i915_active_acquire(&cl->active)) - __i915_vma_pin(cl->hwsp->vma); -} - -static void cacheline_release(struct i915_timeline_cacheline *cl) -{ - if (cl) - i915_active_release(&cl->active); -} - -static void cacheline_free(struct i915_timeline_cacheline *cl) -{ - GEM_BUG_ON(ptr_test_bit(cl->vaddr, CACHELINE_FREE)); - cl->vaddr = ptr_set_bit(cl->vaddr, CACHELINE_FREE); - - if (i915_active_is_idle(&cl->active)) - __idle_cacheline_free(cl); -} - -int i915_timeline_init(struct i915_timeline *timeline, - struct intel_gt *gt, - struct i915_vma *hwsp) -{ - void *vaddr; - - /* - * Ideally we want a set of engines on a single leaf as we expect - * to mostly be tracking synchronisation between engines. It is not - * a huge issue if this is not the case, but we may want to mitigate - * any page crossing penalties if they become an issue. - * - * Called during early_init before we know how many engines there are. - */ - BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); - - timeline->gt = gt; - timeline->pin_count = 0; - timeline->has_initial_breadcrumb = !hwsp; - timeline->hwsp_cacheline = NULL; - - if (!hwsp) { - struct i915_timeline_cacheline *cl; - unsigned int cacheline; - - hwsp = hwsp_alloc(timeline, &cacheline); - if (IS_ERR(hwsp)) - return PTR_ERR(hwsp); - - cl = cacheline_alloc(hwsp->private, cacheline); - if (IS_ERR(cl)) { - __idle_hwsp_free(hwsp->private, cacheline); - return PTR_ERR(cl); - } - - timeline->hwsp_cacheline = cl; - timeline->hwsp_offset = cacheline * CACHELINE_BYTES; - - vaddr = page_mask_bits(cl->vaddr); - } else { - timeline->hwsp_offset = I915_GEM_HWS_SEQNO_ADDR; - - vaddr = i915_gem_object_pin_map(hwsp->obj, I915_MAP_WB); - if (IS_ERR(vaddr)) - return PTR_ERR(vaddr); - } - - timeline->hwsp_seqno = - memset(vaddr + timeline->hwsp_offset, 0, CACHELINE_BYTES); - - timeline->hwsp_ggtt = i915_vma_get(hwsp); - GEM_BUG_ON(timeline->hwsp_offset >= hwsp->size); - - timeline->fence_context = dma_fence_context_alloc(1); - - mutex_init(&timeline->mutex); - - INIT_ACTIVE_REQUEST(&timeline->last_request); - INIT_LIST_HEAD(&timeline->requests); - - i915_syncmap_init(&timeline->sync); - - return 0; -} - -static void timelines_init(struct intel_gt *gt) -{ - struct i915_gt_timelines *timelines = >->timelines; - - mutex_init(&timelines->mutex); - INIT_LIST_HEAD(&timelines->active_list); - - spin_lock_init(&timelines->hwsp_lock); - INIT_LIST_HEAD(&timelines->hwsp_free_list); - - /* via i915_gem_wait_for_idle() */ - i915_gem_shrinker_taints_mutex(gt->i915, &timelines->mutex); -} - -void i915_timelines_init(struct drm_i915_private *i915) -{ - timelines_init(&i915->gt); -} - -static void timeline_add_to_active(struct i915_timeline *tl) -{ - struct i915_gt_timelines *gt = &tl->gt->timelines; - - mutex_lock(>->mutex); - list_add(&tl->link, >->active_list); - mutex_unlock(>->mutex); -} - -static void timeline_remove_from_active(struct i915_timeline *tl) -{ - struct i915_gt_timelines *gt = &tl->gt->timelines; - - mutex_lock(>->mutex); - list_del(&tl->link); - mutex_unlock(>->mutex); -} - -static void timelines_park(struct intel_gt *gt) -{ - struct i915_gt_timelines *timelines = >->timelines; - struct i915_timeline *timeline; - - mutex_lock(&timelines->mutex); - list_for_each_entry(timeline, &timelines->active_list, link) { - /* - * All known fences are completed so we can scrap - * the current sync point tracking and start afresh, - * any attempt to wait upon a previous sync point - * will be skipped as the fence was signaled. - */ - i915_syncmap_free(&timeline->sync); - } - mutex_unlock(&timelines->mutex); -} - -/** - * i915_timelines_park - called when the driver idles - * @i915: the drm_i915_private device - * - * When the driver is completely idle, we know that all of our sync points - * have been signaled and our tracking is then entirely redundant. Any request - * to wait upon an older sync point will be completed instantly as we know - * the fence is signaled and therefore we will not even look them up in the - * sync point map. - */ -void i915_timelines_park(struct drm_i915_private *i915) -{ - timelines_park(&i915->gt); -} - -void i915_timeline_fini(struct i915_timeline *timeline) -{ - GEM_BUG_ON(timeline->pin_count); - GEM_BUG_ON(!list_empty(&timeline->requests)); - - i915_syncmap_free(&timeline->sync); - - if (timeline->hwsp_cacheline) - cacheline_free(timeline->hwsp_cacheline); - else - i915_gem_object_unpin_map(timeline->hwsp_ggtt->obj); - - i915_vma_put(timeline->hwsp_ggtt); -} - -struct i915_timeline * -i915_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp) -{ - struct i915_timeline *timeline; - int err; - - timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); - if (!timeline) - return ERR_PTR(-ENOMEM); - - err = i915_timeline_init(timeline, gt, global_hwsp); - if (err) { - kfree(timeline); - return ERR_PTR(err); - } - - kref_init(&timeline->kref); - - return timeline; -} - -int i915_timeline_pin(struct i915_timeline *tl) -{ - int err; - - if (tl->pin_count++) - return 0; - GEM_BUG_ON(!tl->pin_count); - - err = i915_vma_pin(tl->hwsp_ggtt, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (err) - goto unpin; - - tl->hwsp_offset = - i915_ggtt_offset(tl->hwsp_ggtt) + - offset_in_page(tl->hwsp_offset); - - cacheline_acquire(tl->hwsp_cacheline); - timeline_add_to_active(tl); - - return 0; - -unpin: - tl->pin_count = 0; - return err; -} - -static u32 timeline_advance(struct i915_timeline *tl) -{ - GEM_BUG_ON(!tl->pin_count); - GEM_BUG_ON(tl->seqno & tl->has_initial_breadcrumb); - - return tl->seqno += 1 + tl->has_initial_breadcrumb; -} - -static void timeline_rollback(struct i915_timeline *tl) -{ - tl->seqno -= 1 + tl->has_initial_breadcrumb; -} - -static noinline int -__i915_timeline_get_seqno(struct i915_timeline *tl, - struct i915_request *rq, - u32 *seqno) -{ - struct i915_timeline_cacheline *cl; - unsigned int cacheline; - struct i915_vma *vma; - void *vaddr; - int err; - - /* - * If there is an outstanding GPU reference to this cacheline, - * such as it being sampled by a HW semaphore on another timeline, - * we cannot wraparound our seqno value (the HW semaphore does - * a strict greater-than-or-equals compare, not i915_seqno_passed). - * So if the cacheline is still busy, we must detach ourselves - * from it and leave it inflight alongside its users. - * - * However, if nobody is watching and we can guarantee that nobody - * will, we could simply reuse the same cacheline. - * - * if (i915_active_request_is_signaled(&tl->last_request) && - * i915_active_is_signaled(&tl->hwsp_cacheline->active)) - * return 0; - * - * That seems unlikely for a busy timeline that needed to wrap in - * the first place, so just replace the cacheline. - */ - - vma = hwsp_alloc(tl, &cacheline); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_rollback; - } - - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); - if (err) { - __idle_hwsp_free(vma->private, cacheline); - goto err_rollback; - } - - cl = cacheline_alloc(vma->private, cacheline); - if (IS_ERR(cl)) { - err = PTR_ERR(cl); - __idle_hwsp_free(vma->private, cacheline); - goto err_unpin; - } - GEM_BUG_ON(cl->hwsp->vma != vma); - - /* - * Attach the old cacheline to the current request, so that we only - * free it after the current request is retired, which ensures that - * all writes into the cacheline from previous requests are complete. - */ - err = i915_active_ref(&tl->hwsp_cacheline->active, - tl->fence_context, rq); - if (err) - goto err_cacheline; - - cacheline_release(tl->hwsp_cacheline); /* ownership now xfered to rq */ - cacheline_free(tl->hwsp_cacheline); - - i915_vma_unpin(tl->hwsp_ggtt); /* binding kept alive by old cacheline */ - i915_vma_put(tl->hwsp_ggtt); - - tl->hwsp_ggtt = i915_vma_get(vma); - - vaddr = page_mask_bits(cl->vaddr); - tl->hwsp_offset = cacheline * CACHELINE_BYTES; - tl->hwsp_seqno = - memset(vaddr + tl->hwsp_offset, 0, CACHELINE_BYTES); - - tl->hwsp_offset += i915_ggtt_offset(vma); - - cacheline_acquire(cl); - tl->hwsp_cacheline = cl; - - *seqno = timeline_advance(tl); - GEM_BUG_ON(i915_seqno_passed(*tl->hwsp_seqno, *seqno)); - return 0; - -err_cacheline: - cacheline_free(cl); -err_unpin: - i915_vma_unpin(vma); -err_rollback: - timeline_rollback(tl); - return err; -} - -int i915_timeline_get_seqno(struct i915_timeline *tl, - struct i915_request *rq, - u32 *seqno) -{ - *seqno = timeline_advance(tl); - - /* Replace the HWSP on wraparound for HW semaphores */ - if (unlikely(!*seqno && tl->hwsp_cacheline)) - return __i915_timeline_get_seqno(tl, rq, seqno); - - return 0; -} - -static int cacheline_ref(struct i915_timeline_cacheline *cl, - struct i915_request *rq) -{ - return i915_active_ref(&cl->active, rq->fence.context, rq); -} - -int i915_timeline_read_hwsp(struct i915_request *from, - struct i915_request *to, - u32 *hwsp) -{ - struct i915_timeline_cacheline *cl = from->hwsp_cacheline; - struct i915_timeline *tl = from->timeline; - int err; - - GEM_BUG_ON(to->timeline == tl); - - mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING); - err = i915_request_completed(from); - if (!err) - err = cacheline_ref(cl, to); - if (!err) { - if (likely(cl == tl->hwsp_cacheline)) { - *hwsp = tl->hwsp_offset; - } else { /* across a seqno wrap, recover the original offset */ - *hwsp = i915_ggtt_offset(cl->hwsp->vma) + - ptr_unmask_bits(cl->vaddr, CACHELINE_BITS) * - CACHELINE_BYTES; - } - } - mutex_unlock(&tl->mutex); - - return err; -} - -void i915_timeline_unpin(struct i915_timeline *tl) -{ - GEM_BUG_ON(!tl->pin_count); - if (--tl->pin_count) - return; - - timeline_remove_from_active(tl); - cacheline_release(tl->hwsp_cacheline); - - /* - * Since this timeline is idle, all bariers upon which we were waiting - * must also be complete and so we can discard the last used barriers - * without loss of information. - */ - i915_syncmap_free(&tl->sync); - - __i915_vma_unpin(tl->hwsp_ggtt); -} - -void __i915_timeline_free(struct kref *kref) -{ - struct i915_timeline *timeline = - container_of(kref, typeof(*timeline), kref); - - i915_timeline_fini(timeline); - kfree(timeline); -} - -static void timelines_fini(struct intel_gt *gt) -{ - struct i915_gt_timelines *timelines = >->timelines; - - GEM_BUG_ON(!list_empty(&timelines->active_list)); - GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); - - mutex_destroy(&timelines->mutex); -} - -void i915_timelines_fini(struct drm_i915_private *i915) -{ - timelines_fini(&i915->gt); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_timeline.c" -#include "selftests/i915_timeline.c" -#endif diff --git a/drivers/gpu/drm/i915/i915_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h deleted file mode 100644 index a454d49f229f..000000000000 --- a/drivers/gpu/drm/i915/i915_timeline.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#ifndef I915_TIMELINE_H -#define I915_TIMELINE_H - -#include - -#include "i915_active.h" -#include "i915_syncmap.h" -#include "i915_timeline_types.h" - -int i915_timeline_init(struct i915_timeline *tl, - struct intel_gt *gt, - struct i915_vma *hwsp); -void i915_timeline_fini(struct i915_timeline *tl); - -struct i915_timeline * -i915_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp); - -static inline struct i915_timeline * -i915_timeline_get(struct i915_timeline *timeline) -{ - kref_get(&timeline->kref); - return timeline; -} - -void __i915_timeline_free(struct kref *kref); -static inline void i915_timeline_put(struct i915_timeline *timeline) -{ - kref_put(&timeline->kref, __i915_timeline_free); -} - -static inline int __i915_timeline_sync_set(struct i915_timeline *tl, - u64 context, u32 seqno) -{ - return i915_syncmap_set(&tl->sync, context, seqno); -} - -static inline int i915_timeline_sync_set(struct i915_timeline *tl, - const struct dma_fence *fence) -{ - return __i915_timeline_sync_set(tl, fence->context, fence->seqno); -} - -static inline bool __i915_timeline_sync_is_later(struct i915_timeline *tl, - u64 context, u32 seqno) -{ - return i915_syncmap_is_later(&tl->sync, context, seqno); -} - -static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, - const struct dma_fence *fence) -{ - return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); -} - -int i915_timeline_pin(struct i915_timeline *tl); -int i915_timeline_get_seqno(struct i915_timeline *tl, - struct i915_request *rq, - u32 *seqno); -void i915_timeline_unpin(struct i915_timeline *tl); - -int i915_timeline_read_hwsp(struct i915_request *from, - struct i915_request *until, - u32 *hwsp_offset); - -void i915_timelines_init(struct drm_i915_private *i915); -void i915_timelines_park(struct drm_i915_private *i915); -void i915_timelines_fini(struct drm_i915_private *i915); - -#endif diff --git a/drivers/gpu/drm/i915/i915_timeline_types.h b/drivers/gpu/drm/i915/i915_timeline_types.h deleted file mode 100644 index 931585e12d41..000000000000 --- a/drivers/gpu/drm/i915/i915_timeline_types.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2016 Intel Corporation - */ - -#ifndef __I915_TIMELINE_TYPES_H__ -#define __I915_TIMELINE_TYPES_H__ - -#include -#include -#include -#include - -#include "i915_active_types.h" - -struct drm_i915_private; -struct i915_vma; -struct i915_timeline_cacheline; -struct i915_syncmap; - -struct i915_timeline { - u64 fence_context; - u32 seqno; - - struct mutex mutex; /* protects the flow of requests */ - - unsigned int pin_count; - const u32 *hwsp_seqno; - struct i915_vma *hwsp_ggtt; - u32 hwsp_offset; - - struct i915_timeline_cacheline *hwsp_cacheline; - - bool has_initial_breadcrumb; - - /** - * List of breadcrumbs associated with GPU requests currently - * outstanding. - */ - struct list_head requests; - - /* Contains an RCU guarded pointer to the last request. No reference is - * held to the request, users must carefully acquire a reference to - * the request using i915_active_request_get_request_rcu(), or hold the - * struct_mutex. - */ - struct i915_active_request last_request; - - /** - * We track the most recent seqno that we wait on in every context so - * that we only have to emit a new await and dependency on a more - * recent sync point. As the contexts may be executed out-of-order, we - * have to track each individually and can not rely on an absolute - * global_seqno. When we know that all tracked fences are completed - * (i.e. when the driver is idle), we know that the syncmap is - * redundant and we can discard it without loss of generality. - */ - struct i915_syncmap *sync; - - struct list_head link; - struct intel_gt *gt; - - struct kref kref; -}; - -#endif /* __I915_TIMELINE_TYPES_H__ */ diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index d5dc4427d664..2b31a4ee0b4c 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -12,7 +12,7 @@ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) selftest(workarounds, intel_workarounds_live_selftests) -selftest(timelines, i915_timeline_live_selftests) +selftest(timelines, intel_timeline_live_selftests) selftest(requests, i915_request_live_selftests) selftest(active, i915_active_live_selftests) selftest(objects, i915_gem_object_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 510eb176bb2c..b55da4d9ccba 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -15,7 +15,7 @@ selftest(scatterlist, scatterlist_mock_selftests) selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) selftest(engine, intel_engine_cs_mock_selftests) -selftest(timelines, i915_timeline_mock_selftests) +selftest(timelines, intel_timeline_mock_selftests) selftest(requests, i915_request_mock_selftests) selftest(objects, i915_gem_object_mock_selftests) selftest(phys, i915_gem_phys_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c deleted file mode 100644 index 44d031446f08..000000000000 --- a/drivers/gpu/drm/i915/selftests/i915_timeline.c +++ /dev/null @@ -1,845 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2017-2018 Intel Corporation - */ - -#include - -#include "gem/i915_gem_pm.h" - -#include "i915_random.h" -#include "i915_selftest.h" - -#include "igt_flush_test.h" -#include "mock_gem_device.h" -#include "mock_timeline.h" - -static struct page *hwsp_page(struct i915_timeline *tl) -{ - struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj; - - GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - return sg_page(obj->mm.pages->sgl); -} - -static unsigned long hwsp_cacheline(struct i915_timeline *tl) -{ - unsigned long address = (unsigned long)page_address(hwsp_page(tl)); - - return (address + tl->hwsp_offset) / CACHELINE_BYTES; -} - -#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES) - -struct mock_hwsp_freelist { - struct drm_i915_private *i915; - struct radix_tree_root cachelines; - struct i915_timeline **history; - unsigned long count, max; - struct rnd_state prng; -}; - -enum { - SHUFFLE = BIT(0), -}; - -static void __mock_hwsp_record(struct mock_hwsp_freelist *state, - unsigned int idx, - struct i915_timeline *tl) -{ - tl = xchg(&state->history[idx], tl); - if (tl) { - radix_tree_delete(&state->cachelines, hwsp_cacheline(tl)); - i915_timeline_put(tl); - } -} - -static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state, - unsigned int count, - unsigned int flags) -{ - struct i915_timeline *tl; - unsigned int idx; - - while (count--) { - unsigned long cacheline; - int err; - - tl = i915_timeline_create(&state->i915->gt, NULL); - if (IS_ERR(tl)) - return PTR_ERR(tl); - - cacheline = hwsp_cacheline(tl); - err = radix_tree_insert(&state->cachelines, cacheline, tl); - if (err) { - if (err == -EEXIST) { - pr_err("HWSP cacheline %lu already used; duplicate allocation!\n", - cacheline); - } - i915_timeline_put(tl); - return err; - } - - idx = state->count++ % state->max; - __mock_hwsp_record(state, idx, tl); - } - - if (flags & SHUFFLE) - i915_prandom_shuffle(state->history, - sizeof(*state->history), - min(state->count, state->max), - &state->prng); - - count = i915_prandom_u32_max_state(min(state->count, state->max), - &state->prng); - while (count--) { - idx = --state->count % state->max; - __mock_hwsp_record(state, idx, NULL); - } - - return 0; -} - -static int mock_hwsp_freelist(void *arg) -{ - struct mock_hwsp_freelist state; - const struct { - const char *name; - unsigned int flags; - } phases[] = { - { "linear", 0 }, - { "shuffled", SHUFFLE }, - { }, - }, *p; - unsigned int na; - int err = 0; - - INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL); - state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed); - - state.i915 = mock_gem_device(); - if (!state.i915) - return -ENOMEM; - - /* - * Create a bunch of timelines and check that their HWSP do not overlap. - * Free some, and try again. - */ - - state.max = PAGE_SIZE / sizeof(*state.history); - state.count = 0; - state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL); - if (!state.history) { - err = -ENOMEM; - goto err_put; - } - - mutex_lock(&state.i915->drm.struct_mutex); - for (p = phases; p->name; p++) { - pr_debug("%s(%s)\n", __func__, p->name); - for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) { - err = __mock_hwsp_timeline(&state, na, p->flags); - if (err) - goto out; - } - } - -out: - for (na = 0; na < state.max; na++) - __mock_hwsp_record(&state, na, NULL); - mutex_unlock(&state.i915->drm.struct_mutex); - kfree(state.history); -err_put: - drm_dev_put(&state.i915->drm); - return err; -} - -struct __igt_sync { - const char *name; - u32 seqno; - bool expected; - bool set; -}; - -static int __igt_sync(struct i915_timeline *tl, - u64 ctx, - const struct __igt_sync *p, - const char *name) -{ - int ret; - - if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { - pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", - name, p->name, ctx, p->seqno, yesno(p->expected)); - return -EINVAL; - } - - if (p->set) { - ret = __i915_timeline_sync_set(tl, ctx, p->seqno); - if (ret) - return ret; - } - - return 0; -} - -static int igt_sync(void *arg) -{ - const struct __igt_sync pass[] = { - { "unset", 0, false, false }, - { "new", 0, false, true }, - { "0a", 0, true, true }, - { "1a", 1, false, true }, - { "1b", 1, true, true }, - { "0b", 0, true, false }, - { "2a", 2, false, true }, - { "4", 4, false, true }, - { "INT_MAX", INT_MAX, false, true }, - { "INT_MAX-1", INT_MAX-1, true, false }, - { "INT_MAX+1", (u32)INT_MAX+1, false, true }, - { "INT_MAX", INT_MAX, true, false }, - { "UINT_MAX", UINT_MAX, false, true }, - { "wrap", 0, false, true }, - { "unwrap", UINT_MAX, true, false }, - {}, - }, *p; - struct i915_timeline tl; - int order, offset; - int ret = -ENODEV; - - mock_timeline_init(&tl, 0); - for (p = pass; p->name; p++) { - for (order = 1; order < 64; order++) { - for (offset = -1; offset <= (order > 1); offset++) { - u64 ctx = BIT_ULL(order) + offset; - - ret = __igt_sync(&tl, ctx, p, "1"); - if (ret) - goto out; - } - } - } - mock_timeline_fini(&tl); - - mock_timeline_init(&tl, 0); - for (order = 1; order < 64; order++) { - for (offset = -1; offset <= (order > 1); offset++) { - u64 ctx = BIT_ULL(order) + offset; - - for (p = pass; p->name; p++) { - ret = __igt_sync(&tl, ctx, p, "2"); - if (ret) - goto out; - } - } - } - -out: - mock_timeline_fini(&tl); - return ret; -} - -static unsigned int random_engine(struct rnd_state *rnd) -{ - return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd); -} - -static int bench_sync(void *arg) -{ - struct rnd_state prng; - struct i915_timeline tl; - unsigned long end_time, count; - u64 prng32_1M; - ktime_t kt; - int order, last_order; - - mock_timeline_init(&tl, 0); - - /* Lookups from cache are very fast and so the random number generation - * and the loop itself becomes a significant factor in the per-iteration - * timings. We try to compensate the results by measuring the overhead - * of the prng and subtract it from the reported results. - */ - prandom_seed_state(&prng, i915_selftest.random_seed); - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - u32 x; - - /* Make sure the compiler doesn't optimise away the prng call */ - WRITE_ONCE(x, prandom_u32_state(&prng)); - - count++; - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - pr_debug("%s: %lu random evaluations, %lluns/prng\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count); - - /* Benchmark (only) setting random context ids */ - prandom_seed_state(&prng, i915_selftest.random_seed); - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - u64 id = i915_prandom_u64_state(&prng); - - __i915_timeline_sync_set(&tl, id, 0); - count++; - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); - pr_info("%s: %lu random insertions, %lluns/insert\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - - /* Benchmark looking up the exact same context ids as we just set */ - prandom_seed_state(&prng, i915_selftest.random_seed); - end_time = count; - kt = ktime_get(); - while (end_time--) { - u64 id = i915_prandom_u64_state(&prng); - - if (!__i915_timeline_sync_is_later(&tl, id, 0)) { - mock_timeline_fini(&tl); - pr_err("Lookup of %llu failed\n", id); - return -EINVAL; - } - } - kt = ktime_sub(ktime_get(), kt); - kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); - pr_info("%s: %lu random lookups, %lluns/lookup\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - - mock_timeline_fini(&tl); - cond_resched(); - - mock_timeline_init(&tl, 0); - - /* Benchmark setting the first N (in order) contexts */ - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - __i915_timeline_sync_set(&tl, count++, 0); - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - pr_info("%s: %lu in-order insertions, %lluns/insert\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - - /* Benchmark looking up the exact same context ids as we just set */ - end_time = count; - kt = ktime_get(); - while (end_time--) { - if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) { - pr_err("Lookup of %lu failed\n", end_time); - mock_timeline_fini(&tl); - return -EINVAL; - } - } - kt = ktime_sub(ktime_get(), kt); - pr_info("%s: %lu in-order lookups, %lluns/lookup\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - - mock_timeline_fini(&tl); - cond_resched(); - - mock_timeline_init(&tl, 0); - - /* Benchmark searching for a random context id and maybe changing it */ - prandom_seed_state(&prng, i915_selftest.random_seed); - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - u32 id = random_engine(&prng); - u32 seqno = prandom_u32_state(&prng); - - if (!__i915_timeline_sync_is_later(&tl, id, seqno)) - __i915_timeline_sync_set(&tl, id, seqno); - - count++; - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); - pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", - __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_fini(&tl); - cond_resched(); - - /* Benchmark searching for a known context id and changing the seqno */ - for (last_order = 1, order = 1; order < 32; - ({ int tmp = last_order; last_order = order; order += tmp; })) { - unsigned int mask = BIT(order) - 1; - - mock_timeline_init(&tl, 0); - - count = 0; - kt = ktime_get(); - end_time = jiffies + HZ/10; - do { - /* Without assuming too many details of the underlying - * implementation, try to identify its phase-changes - * (if any)! - */ - u64 id = (u64)(count & mask) << order; - - __i915_timeline_sync_is_later(&tl, id, 0); - __i915_timeline_sync_set(&tl, id, 0); - - count++; - } while (!time_after(jiffies, end_time)); - kt = ktime_sub(ktime_get(), kt); - pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", - __func__, count, order, - (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_fini(&tl); - cond_resched(); - } - - return 0; -} - -int i915_timeline_mock_selftests(void) -{ - static const struct i915_subtest tests[] = { - SUBTEST(mock_hwsp_freelist), - SUBTEST(igt_sync), - SUBTEST(bench_sync), - }; - - return i915_subtests(tests, NULL); -} - -static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - if (INTEL_GEN(rq->i915) >= 8) { - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = addr; - *cs++ = 0; - *cs++ = value; - } else if (INTEL_GEN(rq->i915) >= 4) { - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = 0; - *cs++ = addr; - *cs++ = value; - } else { - *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; - *cs++ = addr; - *cs++ = value; - *cs++ = MI_NOOP; - } - - intel_ring_advance(rq, cs); - - return 0; -} - -static struct i915_request * -tl_write(struct i915_timeline *tl, struct intel_engine_cs *engine, u32 value) -{ - struct i915_request *rq; - int err; - - lockdep_assert_held(&tl->gt->i915->drm.struct_mutex); /* lazy rq refs */ - - err = i915_timeline_pin(tl); - if (err) { - rq = ERR_PTR(err); - goto out; - } - - rq = i915_request_create(engine->kernel_context); - if (IS_ERR(rq)) - goto out_unpin; - - err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value); - i915_request_add(rq); - if (err) - rq = ERR_PTR(err); - -out_unpin: - i915_timeline_unpin(tl); -out: - if (IS_ERR(rq)) - pr_err("Failed to write to timeline!\n"); - return rq; -} - -static struct i915_timeline * -checked_i915_timeline_create(struct drm_i915_private *i915) -{ - struct i915_timeline *tl; - - tl = i915_timeline_create(&i915->gt, NULL); - if (IS_ERR(tl)) - return tl; - - if (*tl->hwsp_seqno != tl->seqno) { - pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n", - *tl->hwsp_seqno, tl->seqno); - i915_timeline_put(tl); - return ERR_PTR(-EINVAL); - } - - return tl; -} - -static int live_hwsp_engine(void *arg) -{ -#define NUM_TIMELINES 4096 - struct drm_i915_private *i915 = arg; - struct i915_timeline **timelines; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - unsigned long count, n; - int err = 0; - - /* - * Create a bunch of timelines and check we can write - * independently to each of their breadcrumb slots. - */ - - timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, - sizeof(*timelines), - GFP_KERNEL); - if (!timelines) - return -ENOMEM; - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - count = 0; - for_each_engine(engine, i915, id) { - if (!intel_engine_can_store_dword(engine)) - continue; - - for (n = 0; n < NUM_TIMELINES; n++) { - struct i915_timeline *tl; - struct i915_request *rq; - - tl = checked_i915_timeline_create(i915); - if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto out; - } - - rq = tl_write(tl, engine, count); - if (IS_ERR(rq)) { - i915_timeline_put(tl); - err = PTR_ERR(rq); - goto out; - } - - timelines[count++] = tl; - } - } - -out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - - for (n = 0; n < count; n++) { - struct i915_timeline *tl = timelines[n]; - - if (!err && *tl->hwsp_seqno != n) { - pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", - n, *tl->hwsp_seqno); - err = -EINVAL; - } - i915_timeline_put(tl); - } - - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - - kvfree(timelines); - - return err; -#undef NUM_TIMELINES -} - -static int live_hwsp_alternate(void *arg) -{ -#define NUM_TIMELINES 4096 - struct drm_i915_private *i915 = arg; - struct i915_timeline **timelines; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - unsigned long count, n; - int err = 0; - - /* - * Create a bunch of timelines and check we can write - * independently to each of their breadcrumb slots with adjacent - * engines. - */ - - timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES, - sizeof(*timelines), - GFP_KERNEL); - if (!timelines) - return -ENOMEM; - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - count = 0; - for (n = 0; n < NUM_TIMELINES; n++) { - for_each_engine(engine, i915, id) { - struct i915_timeline *tl; - struct i915_request *rq; - - if (!intel_engine_can_store_dword(engine)) - continue; - - tl = checked_i915_timeline_create(i915); - if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto out; - } - - rq = tl_write(tl, engine, count); - if (IS_ERR(rq)) { - i915_timeline_put(tl); - err = PTR_ERR(rq); - goto out; - } - - timelines[count++] = tl; - } - } - -out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - - for (n = 0; n < count; n++) { - struct i915_timeline *tl = timelines[n]; - - if (!err && *tl->hwsp_seqno != n) { - pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", - n, *tl->hwsp_seqno); - err = -EINVAL; - } - i915_timeline_put(tl); - } - - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - - kvfree(timelines); - - return err; -#undef NUM_TIMELINES -} - -static int live_hwsp_wrap(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - struct i915_timeline *tl; - enum intel_engine_id id; - intel_wakeref_t wakeref; - int err = 0; - - /* - * Across a seqno wrap, we need to keep the old cacheline alive for - * foreign GPU references. - */ - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - tl = i915_timeline_create(&i915->gt, NULL); - if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto out_rpm; - } - if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline) - goto out_free; - - err = i915_timeline_pin(tl); - if (err) - goto out_free; - - for_each_engine(engine, i915, id) { - const u32 *hwsp_seqno[2]; - struct i915_request *rq; - u32 seqno[2]; - - if (!intel_engine_can_store_dword(engine)) - continue; - - rq = i915_request_create(engine->kernel_context); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out; - } - - tl->seqno = -4u; - - err = i915_timeline_get_seqno(tl, rq, &seqno[0]); - if (err) { - i915_request_add(rq); - goto out; - } - pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n", - seqno[0], tl->hwsp_offset); - - err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]); - if (err) { - i915_request_add(rq); - goto out; - } - hwsp_seqno[0] = tl->hwsp_seqno; - - err = i915_timeline_get_seqno(tl, rq, &seqno[1]); - if (err) { - i915_request_add(rq); - goto out; - } - pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n", - seqno[1], tl->hwsp_offset); - - err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]); - if (err) { - i915_request_add(rq); - goto out; - } - hwsp_seqno[1] = tl->hwsp_seqno; - - /* With wrap should come a new hwsp */ - GEM_BUG_ON(seqno[1] >= seqno[0]); - GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]); - - i915_request_add(rq); - - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - pr_err("Wait for timeline writes timed out!\n"); - err = -EIO; - goto out; - } - - if (*hwsp_seqno[0] != seqno[0] || *hwsp_seqno[1] != seqno[1]) { - pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n", - *hwsp_seqno[0], *hwsp_seqno[1], - seqno[0], seqno[1]); - err = -EINVAL; - goto out; - } - - i915_retire_requests(i915); /* recycle HWSP */ - } - -out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - - i915_timeline_unpin(tl); -out_free: - i915_timeline_put(tl); -out_rpm: - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - - return err; -} - -static int live_hwsp_recycle(void *arg) -{ - struct drm_i915_private *i915 = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - intel_wakeref_t wakeref; - unsigned long count; - int err = 0; - - /* - * Check seqno writes into one timeline at a time. We expect to - * recycle the breadcrumb slot between iterations and neither - * want to confuse ourselves or the GPU. - */ - - mutex_lock(&i915->drm.struct_mutex); - wakeref = intel_runtime_pm_get(&i915->runtime_pm); - - count = 0; - for_each_engine(engine, i915, id) { - IGT_TIMEOUT(end_time); - - if (!intel_engine_can_store_dword(engine)) - continue; - - do { - struct i915_timeline *tl; - struct i915_request *rq; - - tl = checked_i915_timeline_create(i915); - if (IS_ERR(tl)) { - err = PTR_ERR(tl); - goto out; - } - - rq = tl_write(tl, engine, count); - if (IS_ERR(rq)) { - i915_timeline_put(tl); - err = PTR_ERR(rq); - goto out; - } - - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - pr_err("Wait for timeline writes timed out!\n"); - i915_timeline_put(tl); - err = -EIO; - goto out; - } - - if (*tl->hwsp_seqno != count) { - pr_err("Invalid seqno stored in timeline %lu, found 0x%x\n", - count, *tl->hwsp_seqno); - err = -EINVAL; - } - - i915_timeline_put(tl); - count++; - - if (err) - goto out; - - i915_timelines_park(i915); /* Encourage recycling! */ - } while (!__igt_timeout(end_time, NULL)); - } - -out: - if (igt_flush_test(i915, I915_WAIT_LOCKED)) - err = -EIO; - intel_runtime_pm_put(&i915->runtime_pm, wakeref); - mutex_unlock(&i915->drm.struct_mutex); - - return err; -} - -int i915_timeline_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(live_hwsp_recycle), - SUBTEST(live_hwsp_engine), - SUBTEST(live_hwsp_alternate), - SUBTEST(live_hwsp_wrap), - }; - - if (i915_terminally_wedged(i915)) - return 0; - - return i915_subtests(tests, i915); -} diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index 021ba42a3a00..2741805b56c2 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -68,7 +68,7 @@ static void mock_device_release(struct drm_device *dev) i915_gem_contexts_fini(i915); mutex_unlock(&i915->drm.struct_mutex); - i915_timelines_fini(i915); + intel_timelines_fini(i915); drain_workqueue(i915->wq); i915_gem_drain_freed_objects(i915); @@ -199,7 +199,7 @@ struct drm_i915_private *mock_gem_device(void) i915->gt.awake = true; - i915_timelines_init(i915); + intel_timelines_init(i915); mutex_lock(&i915->drm.struct_mutex); @@ -230,7 +230,7 @@ err_engine: mock_engine_free(i915->engine[RCS0]); err_unlock: mutex_unlock(&i915->drm.struct_mutex); - i915_timelines_fini(i915); + intel_timelines_fini(i915); destroy_workqueue(i915->wq); err_drv: drm_mode_config_cleanup(&i915->drm); diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c deleted file mode 100644 index c80ac0fbdd3b..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ /dev/null @@ -1,29 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2017-2018 Intel Corporation - */ - -#include "../i915_timeline.h" - -#include "mock_timeline.h" - -void mock_timeline_init(struct i915_timeline *timeline, u64 context) -{ - timeline->gt = NULL; - timeline->fence_context = context; - - mutex_init(&timeline->mutex); - - INIT_ACTIVE_REQUEST(&timeline->last_request); - INIT_LIST_HEAD(&timeline->requests); - - i915_syncmap_init(&timeline->sync); - - INIT_LIST_HEAD(&timeline->link); -} - -void mock_timeline_fini(struct i915_timeline *timeline) -{ - i915_syncmap_free(&timeline->sync); -} diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h deleted file mode 100644 index b6deaa61110d..000000000000 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * SPDX-License-Identifier: MIT - * - * Copyright © 2017-2018 Intel Corporation - */ - -#ifndef __MOCK_TIMELINE__ -#define __MOCK_TIMELINE__ - -struct i915_timeline; - -void mock_timeline_init(struct i915_timeline *timeline, u64 context); -void mock_timeline_fini(struct i915_timeline *timeline); - -#endif /* !__MOCK_TIMELINE__ */ -- cgit v1.2.3 From 2a98f4e65bba2db83489aaadd3f9db26547cbd35 Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Tue, 9 Jul 2019 17:42:27 +0100 Subject: drm/i915: add infrastructure to hold off preemption on a request We want to set this flag in the next commit on requests containing perf queries so that the result of the perf query can just be a delta of global counters, rather than doing post processing of the OA buffer. Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson [ickle: add basic selftest for nopreempt] Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190709164227.25859-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_lrc.c | 11 +++ drivers/gpu/drm/i915/gt/selftest_lrc.c | 109 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_priolist_types.h | 10 +++ drivers/gpu/drm/i915/i915_request.c | 4 +- drivers/gpu/drm/i915/i915_request.h | 15 +++- drivers/gpu/drm/i915/intel_guc_submission.c | 13 +++- drivers/gpu/drm/i915/intel_pm.c | 5 +- 7 files changed, 161 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index dec735405195..19ce8eb5e5c9 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -258,6 +258,17 @@ static int effective_prio(const struct i915_request *rq) { int prio = rq_prio(rq); + /* + * If this request is special and must not be interrupted at any + * cost, so be it. Note we are only checking the most recent request + * in the context and so may be masking an earlier vip request. It + * is hoped that under the conditions where nopreempt is used, this + * will not matter (i.e. all requests to that context will be + * nopreempt for as long as desired). + */ + if (i915_request_has_nopreempt(rq)) + prio = I915_PRIORITY_UNPREEMPTABLE; + /* * On unwinding the active request, we give it a priority bump * if it has completed waiting on any semaphore. If we know that diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 672bdaa66540..b9b881ab8e7c 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -721,6 +721,114 @@ static void preempt_client_fini(struct preempt_client *c) kernel_context_close(c->ctx); } +static int live_nopreempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct preempt_client a, b; + enum intel_engine_id id; + intel_wakeref_t wakeref; + int err = -ENOMEM; + + /* + * Verify that we can disable preemption for an individual request + * that may be being observed and not want to be interrupted. + */ + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); + + if (preempt_client_init(i915, &a)) + goto err_unlock; + if (preempt_client_init(i915, &b)) + goto err_client_a; + b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); + + for_each_engine(engine, i915, id) { + struct i915_request *rq_a, *rq_b; + + if (!intel_engine_has_preemption(engine)) + continue; + + engine->execlists.preempt_hang.count = 0; + + rq_a = igt_spinner_create_request(&a.spin, + a.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq_a)) { + err = PTR_ERR(rq_a); + goto err_client_b; + } + + /* Low priority client, but unpreemptable! */ + rq_a->flags |= I915_REQUEST_NOPREEMPT; + + i915_request_add(rq_a); + if (!igt_wait_for_spinner(&a.spin, rq_a)) { + pr_err("First client failed to start\n"); + goto err_wedged; + } + + rq_b = igt_spinner_create_request(&b.spin, + b.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq_b)) { + err = PTR_ERR(rq_b); + goto err_client_b; + } + + i915_request_add(rq_b); + + /* B is much more important than A! (But A is unpreemptable.) */ + GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); + + /* Wait long enough for preemption and timeslicing */ + if (igt_wait_for_spinner(&b.spin, rq_b)) { + pr_err("Second client started too early!\n"); + goto err_wedged; + } + + igt_spinner_end(&a.spin); + + if (!igt_wait_for_spinner(&b.spin, rq_b)) { + pr_err("Second client failed to start\n"); + goto err_wedged; + } + + igt_spinner_end(&b.spin); + + if (engine->execlists.preempt_hang.count) { + pr_err("Preemption recorded x%d; should have been suppressed!\n", + engine->execlists.preempt_hang.count); + err = -EINVAL; + goto err_wedged; + } + + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&b); +err_client_a: + preempt_client_fini(&a); +err_unlock: + intel_runtime_pm_put(&i915->runtime_pm, wakeref); + mutex_unlock(&i915->drm.struct_mutex); + return err; + +err_wedged: + igt_spinner_end(&b.spin); + igt_spinner_end(&a.spin); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_client_b; +} + static int live_suppress_self_preempt(void *arg) { struct drm_i915_private *i915 = arg; @@ -2028,6 +2136,7 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_busywait_preempt), SUBTEST(live_preempt), SUBTEST(live_late_preempt), + SUBTEST(live_nopreempt), SUBTEST(live_suppress_self_preempt), SUBTEST(live_suppress_wait_preempt), SUBTEST(live_chain_preempt), diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index 49709de69875..b02dea17dcab 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -17,6 +17,16 @@ enum { I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, + /* + * Requests containing performance queries must not be preempted by + * another context. They get scheduled with their default priority and + * once they reach the execlist ports we ensure that they stick on the + * HW until finished by pretending that they have maximum priority, + * i.e. nothing can have higher priority and force us to usurp the + * active request. + */ + I915_PRIORITY_UNPREEMPTABLE = INT_MAX, + I915_PRIORITY_INVALID = INT_MIN }; diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 5ff87c4a0cd5..222c9c56e9de 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -292,7 +292,7 @@ static bool i915_request_retire(struct i915_request *rq) dma_fence_signal_locked(&rq->fence); if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) i915_request_cancel_breadcrumb(rq); - if (rq->waitboost) { + if (i915_request_has_waitboost(rq)) { GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); atomic_dec(&rq->i915->gt_pm.rps.num_waiters); } @@ -684,7 +684,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->file_priv = NULL; rq->batch = NULL; rq->capture_list = NULL; - rq->waitboost = false; + rq->flags = 0; rq->execution_mask = ALL_ENGINES; INIT_LIST_HEAD(&rq->active_list); diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index b58ceef92e20..313df3c37158 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -216,7 +216,9 @@ struct i915_request { /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; - bool waitboost; + unsigned long flags; +#define I915_REQUEST_WAITBOOST BIT(0) +#define I915_REQUEST_NOPREEMPT BIT(1) /** timeline->request entry for this request */ struct list_head link; @@ -430,6 +432,17 @@ static inline void i915_request_mark_complete(struct i915_request *rq) rq->hwsp_seqno = (u32 *)&rq->fence.seqno; /* decouple from HWSP */ } +static inline bool i915_request_has_waitboost(const struct i915_request *rq) +{ + return rq->flags & I915_REQUEST_WAITBOOST; +} + +static inline bool i915_request_has_nopreempt(const struct i915_request *rq) +{ + /* Preemption should only be disabled very rarely */ + return unlikely(rq->flags & I915_REQUEST_NOPREEMPT); +} + bool i915_retire_requests(struct drm_i915_private *i915); #endif /* I915_REQUEST_H */ diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 12c22359fdac..f104b94c14ef 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -707,6 +707,16 @@ static inline int rq_prio(const struct i915_request *rq) return rq->sched.attr.priority | __NO_PREEMPTION; } +static inline int effective_prio(const struct i915_request *rq) +{ + int prio = rq_prio(rq); + + if (i915_request_has_nopreempt(rq)) + prio = I915_PRIORITY_UNPREEMPTABLE; + + return prio; +} + static struct i915_request *schedule_in(struct i915_request *rq, int idx) { trace_i915_request_in(rq, idx); @@ -747,7 +757,8 @@ static void __guc_dequeue(struct intel_engine_cs *engine) &engine->i915->guc.preempt_work[engine->id]; int prio = execlists->queue_priority_hint; - if (i915_scheduler_need_preempt(prio, rq_prio(last))) { + if (i915_scheduler_need_preempt(prio, + effective_prio(last))) { intel_write_status_page(engine, I915_GEM_HWS_PREEMPT, GUC_PREEMPT_INPROGRESS); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 87244d8215a7..0cecea228546 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6876,9 +6876,10 @@ void gen6_rps_boost(struct i915_request *rq) /* Serializes with i915_request_retire() */ boost = false; spin_lock_irqsave(&rq->lock, flags); - if (!rq->waitboost && !dma_fence_is_signaled_locked(&rq->fence)) { + if (!i915_request_has_waitboost(rq) && + !dma_fence_is_signaled_locked(&rq->fence)) { boost = !atomic_fetch_inc(&rps->num_waiters); - rq->waitboost = true; + rq->flags |= I915_REQUEST_WAITBOOST; } spin_unlock_irqrestore(&rq->lock, flags); if (!boost) -- cgit v1.2.3 From a79ca656b648e6851dd2f1eb0dcef86b278a8908 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 13 Aug 2019 20:07:05 +0100 Subject: drm/i915: Push the wakeref->count deferral to the backend If the backend wishes to defer the wakeref parking, make it responsible for unlocking the wakeref (i.e. bumping the counter). This allows it to time the unlock much more carefully in case it happens to needs the wakeref to be active during its deferral. For instance, during engine parking we may choose to emit an idle barrier (a request). To do so, we borrow the engine->kernel_context timeline and to ensure exclusive access we keep the engine->wakeref.count as 0. However, to submit that request to HW may require a intel_engine_pm_get() (e.g. to keep the submission tasklet alive) and before we allow that we have to rewake our wakeref to avoid a recursive deadlock. <4> [257.742916] IRQs not enabled as expected <4> [257.742930] WARNING: CPU: 0 PID: 0 at kernel/softirq.c:169 __local_bh_enable_ip+0xa9/0x100 <4> [257.742936] Modules linked in: vgem snd_hda_codec_hdmi snd_hda_codec_realtek snd_hda_codec_generic i915 btusb btrtl btbcm btintel snd_hda_intel snd_intel_nhlt bluetooth snd_hda_codec coretemp snd_hwdep crct10dif_pclmul snd_hda_core crc32_pclmul ecdh_generic ecc ghash_clmulni_intel snd_pcm r8169 realtek lpc_ich prime_numbers i2c_hid <4> [257.742991] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G U W 5.3.0-rc3-g5d0a06cd532c-drmtip_340+ #1 <4> [257.742998] Hardware name: GIGABYTE GB-BXBT-1900/MZBAYAB-00, BIOS F6 02/17/2015 <4> [257.743008] RIP: 0010:__local_bh_enable_ip+0xa9/0x100 <4> [257.743017] Code: 37 5b 5d c3 8b 80 50 08 00 00 85 c0 75 a9 80 3d 0b be 25 01 00 75 a0 48 c7 c7 f3 0c 06 ac c6 05 fb bd 25 01 01 e8 77 84 ff ff <0f> 0b eb 89 48 89 ef e8 3b 41 06 00 eb 98 e8 e4 5c f4 ff 5b 5d c3 <4> [257.743025] RSP: 0018:ffffa78600003cb8 EFLAGS: 00010086 <4> [257.743035] RAX: 0000000000000000 RBX: 0000000000000200 RCX: 0000000000010302 <4> [257.743042] RDX: 0000000080010302 RSI: 0000000000000000 RDI: 00000000ffffffff <4> [257.743050] RBP: ffffffffc0494bb3 R08: 0000000000000000 R09: 0000000000000001 <4> [257.743058] R10: 0000000014c8f0e9 R11: 00000000fee2ff8e R12: ffffa23ba8c38008 <4> [257.743065] R13: ffffa23bacc579c0 R14: ffffa23bb7db0f60 R15: ffffa23b9cc8c430 <4> [257.743074] FS: 0000000000000000(0000) GS:ffffa23bbba00000(0000) knlGS:0000000000000000 <4> [257.743082] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 <4> [257.743089] CR2: 00007fe477b20778 CR3: 000000011f72a000 CR4: 00000000001006f0 <4> [257.743096] Call Trace: <4> [257.743104] <4> [257.743265] __i915_request_commit+0x240/0x5d0 [i915] <4> [257.743427] ? __i915_request_create+0x228/0x4c0 [i915] <4> [257.743584] __engine_park+0x64/0x250 [i915] <4> [257.743730] ____intel_wakeref_put_last+0x1c/0x70 [i915] <4> [257.743878] i915_sample+0x2ee/0x310 [i915] <4> [257.744030] ? i915_pmu_cpu_offline+0xb0/0xb0 [i915] <4> [257.744040] __hrtimer_run_queues+0x11e/0x4b0 <4> [257.744068] hrtimer_interrupt+0xea/0x250 <4> [257.744079] ? lockdep_hardirqs_off+0x79/0xd0 <4> [257.744101] smp_apic_timer_interrupt+0x96/0x280 <4> [257.744114] apic_timer_interrupt+0xf/0x20 <4> [257.744125] RIP: 0010:__do_softirq+0xb3/0x4ae v2: Keep the priority_hint assert v3: That assert was desperately trying to point out my bug. Sorry, little assert. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=111378 Signed-off-by: Chris Wilson Cc: Daniele Ceraolo Spurio Cc: Mika Kuoppala Reviewed-by: Mika Kuoppala Link: https://patchwork.freedesktop.org/patch/msgid/20190813190705.23869-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 8 +++- drivers/gpu/drm/i915/i915_priolist_types.h | 25 +++++------ drivers/gpu/drm/i915/i915_request.c | 66 ++++++++++++++++-------------- drivers/gpu/drm/i915/i915_request.h | 2 + drivers/gpu/drm/i915/i915_scheduler.c | 3 +- drivers/gpu/drm/i915/intel_wakeref.c | 4 +- drivers/gpu/drm/i915/intel_wakeref.h | 11 +++++ 7 files changed, 70 insertions(+), 49 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 6b15e3335dd6..49ad02c3720f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -68,10 +68,16 @@ static bool switch_to_kernel_context(struct intel_engine_cs *engine) /* Check again on the next retirement. */ engine->wakeref_serial = engine->serial + 1; - i915_request_add_active_barriers(rq); + + /* Install ourselves as a preemption barrier */ + rq->sched.attr.priority = I915_PRIORITY_UNPREEMPTABLE; __i915_request_commit(rq); + /* Release our exclusive hold on the engine */ + __intel_wakeref_defer_park(&engine->wakeref); + __i915_request_queue(rq, NULL); + return false; } diff --git a/drivers/gpu/drm/i915/i915_priolist_types.h b/drivers/gpu/drm/i915/i915_priolist_types.h index b02dea17dcab..21037a2e2038 100644 --- a/drivers/gpu/drm/i915/i915_priolist_types.h +++ b/drivers/gpu/drm/i915/i915_priolist_types.h @@ -16,18 +16,6 @@ enum { I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, - - /* - * Requests containing performance queries must not be preempted by - * another context. They get scheduled with their default priority and - * once they reach the execlist ports we ensure that they stick on the - * HW until finished by pretending that they have maximum priority, - * i.e. nothing can have higher priority and force us to usurp the - * active request. - */ - I915_PRIORITY_UNPREEMPTABLE = INT_MAX, - - I915_PRIORITY_INVALID = INT_MIN }; #define I915_USER_PRIORITY_SHIFT 2 @@ -39,6 +27,19 @@ enum { #define I915_PRIORITY_WAIT ((u8)BIT(0)) #define I915_PRIORITY_NOSEMAPHORE ((u8)BIT(1)) +/* Smallest priority value that cannot be bumped. */ +#define I915_PRIORITY_INVALID (INT_MIN | (u8)I915_PRIORITY_MASK) + +/* + * Requests containing performance queries must not be preempted by + * another context. They get scheduled with their default priority and + * once they reach the execlist ports we ensure that they stick on the + * HW until finished by pretending that they have maximum priority, + * i.e. nothing can have higher priority and force us to usurp the + * active request. + */ +#define I915_PRIORITY_UNPREEMPTABLE INT_MAX + #define __NO_PREEMPTION (I915_PRIORITY_WAIT) struct i915_priolist { diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 43175bada09e..4703aab3ae21 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1186,6 +1186,12 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) list_add(&ring->active_link, &rq->i915->gt.active_rings); rq->emitted_jiffies = jiffies; + return prev; +} + +void __i915_request_queue(struct i915_request *rq, + const struct i915_sched_attr *attr) +{ /* * Let the backend know a new request has arrived that may need * to adjust the existing execution schedule due to a high priority @@ -1199,43 +1205,15 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) */ local_bh_disable(); i915_sw_fence_commit(&rq->semaphore); - if (engine->schedule) { - struct i915_sched_attr attr = rq->gem_context->sched; - - /* - * Boost actual workloads past semaphores! - * - * With semaphores we spin on one engine waiting for another, - * simply to reduce the latency of starting our work when - * the signaler completes. However, if there is any other - * work that we could be doing on this engine instead, that - * is better utilisation and will reduce the overall duration - * of the current work. To avoid PI boosting a semaphore - * far in the distance past over useful work, we keep a history - * of any semaphore use along our dependency chain. - */ - if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) - attr.priority |= I915_PRIORITY_NOSEMAPHORE; - - /* - * Boost priorities to new clients (new request flows). - * - * Allow interactive/synchronous clients to jump ahead of - * the bulk clients. (FQ_CODEL) - */ - if (list_empty(&rq->sched.signalers_list)) - attr.priority |= I915_PRIORITY_WAIT; - - engine->schedule(rq, &attr); - } + if (attr && rq->engine->schedule) + rq->engine->schedule(rq, attr); i915_sw_fence_commit(&rq->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ - - return prev; } void i915_request_add(struct i915_request *rq) { + struct i915_sched_attr attr = rq->gem_context->sched; struct i915_request *prev; lockdep_assert_held(&rq->timeline->mutex); @@ -1245,6 +1223,32 @@ void i915_request_add(struct i915_request *rq) prev = __i915_request_commit(rq); + /* + * Boost actual workloads past semaphores! + * + * With semaphores we spin on one engine waiting for another, + * simply to reduce the latency of starting our work when + * the signaler completes. However, if there is any other + * work that we could be doing on this engine instead, that + * is better utilisation and will reduce the overall duration + * of the current work. To avoid PI boosting a semaphore + * far in the distance past over useful work, we keep a history + * of any semaphore use along our dependency chain. + */ + if (!(rq->sched.flags & I915_SCHED_HAS_SEMAPHORE_CHAIN)) + attr.priority |= I915_PRIORITY_NOSEMAPHORE; + + /* + * Boost priorities to new clients (new request flows). + * + * Allow interactive/synchronous clients to jump ahead of + * the bulk clients. (FQ_CODEL) + */ + if (list_empty(&rq->sched.signalers_list)) + attr.priority |= I915_PRIORITY_WAIT; + + __i915_request_queue(rq, &attr); + /* * In typical scenarios, we do not expect the previous request on * the timeline to be still tracked by timeline->last_request if it diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 313df3c37158..fec1d5f17c94 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -251,6 +251,8 @@ struct i915_request * __must_check i915_request_create(struct intel_context *ce); struct i915_request *__i915_request_commit(struct i915_request *request); +void __i915_request_queue(struct i915_request *rq, + const struct i915_sched_attr *attr); void i915_request_retire_upto(struct i915_request *rq); diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 0bd452e851d8..7b84ebca2901 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -349,8 +349,7 @@ void i915_schedule_bump_priority(struct i915_request *rq, unsigned int bump) unsigned long flags; GEM_BUG_ON(bump & ~I915_PRIORITY_MASK); - - if (READ_ONCE(rq->sched.attr.priority) == I915_PRIORITY_INVALID) + if (READ_ONCE(rq->sched.attr.priority) & bump) return; spin_lock_irqsave(&schedule_lock, flags); diff --git a/drivers/gpu/drm/i915/intel_wakeref.c b/drivers/gpu/drm/i915/intel_wakeref.c index d4443e81c1c8..868cc78048d0 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.c +++ b/drivers/gpu/drm/i915/intel_wakeref.c @@ -57,12 +57,10 @@ static void ____intel_wakeref_put_last(struct intel_wakeref *wf) if (!atomic_dec_and_test(&wf->count)) goto unlock; + /* ops->put() must reschedule its own release on error/deferral */ if (likely(!wf->ops->put(wf))) { rpm_put(wf); wake_up_var(&wf->wakeref); - } else { - /* ops->put() must schedule its own release on deferral */ - atomic_set_release(&wf->count, 1); } unlock: diff --git a/drivers/gpu/drm/i915/intel_wakeref.h b/drivers/gpu/drm/i915/intel_wakeref.h index 535a3a12864b..5f0c972a80fb 100644 --- a/drivers/gpu/drm/i915/intel_wakeref.h +++ b/drivers/gpu/drm/i915/intel_wakeref.h @@ -163,6 +163,17 @@ intel_wakeref_is_active(const struct intel_wakeref *wf) return READ_ONCE(wf->wakeref); } +/** + * __intel_wakeref_defer_park: Defer the current park callback + * @wf: the wakeref + */ +static inline void +__intel_wakeref_defer_park(struct intel_wakeref *wf) +{ + INTEL_WAKEREF_BUG_ON(atomic_read(&wf->count)); + atomic_set_release(&wf->count, 1); +} + /** * intel_wakeref_wait_for_idle: Wait until the wakeref is idle * @wf: the wakeref -- cgit v1.2.3 From e5dadff4b09376e8ed92ecc0c12f1b9b3b1fbd19 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 15 Aug 2019 21:57:09 +0100 Subject: drm/i915: Protect request retirement with timeline->mutex Forgo the struct_mutex requirement for request retirement as we have been transitioning over to only using the timeline->mutex for controlling the lifetime of a request on that timeline. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20190815205709.24285-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c | 183 ++++++++++++++----------- drivers/gpu/drm/i915/gt/intel_context.h | 18 ++- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 1 - drivers/gpu/drm/i915/gt/intel_engine_types.h | 3 - drivers/gpu/drm/i915/gt/intel_gt.c | 2 - drivers/gpu/drm/i915/gt/intel_gt_types.h | 2 - drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 19 ++- drivers/gpu/drm/i915/gt/mock_engine.c | 1 - drivers/gpu/drm/i915/gt/selftest_context.c | 9 +- drivers/gpu/drm/i915/i915_request.c | 156 ++++++++++----------- drivers/gpu/drm/i915/i915_request.h | 3 - 12 files changed, 209 insertions(+), 189 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 1bd2187ac8d6..77a201bb3422 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -735,63 +735,6 @@ static int eb_select_context(struct i915_execbuffer *eb) return 0; } -static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring) -{ - struct i915_request *rq; - - /* - * Completely unscientific finger-in-the-air estimates for suitable - * maximum user request size (to avoid blocking) and then backoff. - */ - if (intel_ring_update_space(ring) >= PAGE_SIZE) - return NULL; - - /* - * Find a request that after waiting upon, there will be at least half - * the ring available. The hysteresis allows us to compete for the - * shared ring and should mean that we sleep less often prior to - * claiming our resources, but not so long that the ring completely - * drains before we can submit our next request. - */ - list_for_each_entry(rq, &ring->request_list, ring_link) { - if (__intel_ring_space(rq->postfix, - ring->emit, ring->size) > ring->size / 2) - break; - } - if (&rq->ring_link == &ring->request_list) - return NULL; /* weird, we will check again later for real */ - - return i915_request_get(rq); -} - -static int eb_wait_for_ring(const struct i915_execbuffer *eb) -{ - struct i915_request *rq; - int ret = 0; - - /* - * Apply a light amount of backpressure to prevent excessive hogs - * from blocking waiting for space whilst holding struct_mutex and - * keeping all of their resources pinned. - */ - - rq = __eb_wait_for_ring(eb->context->ring); - if (rq) { - mutex_unlock(&eb->i915->drm.struct_mutex); - - if (i915_request_wait(rq, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT) < 0) - ret = -EINTR; - - i915_request_put(rq); - - mutex_lock(&eb->i915->drm.struct_mutex); - } - - return ret; -} - static int eb_lookup_vmas(struct i915_execbuffer *eb) { struct radix_tree_root *handles_vma = &eb->gem_context->handles_vma; @@ -2134,10 +2077,75 @@ static const enum intel_engine_id user_ring_map[] = { [I915_EXEC_VEBOX] = VECS0 }; -static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) +static struct i915_request *eb_throttle(struct intel_context *ce) +{ + struct intel_ring *ring = ce->ring; + struct intel_timeline *tl = ce->timeline; + struct i915_request *rq; + + /* + * Completely unscientific finger-in-the-air estimates for suitable + * maximum user request size (to avoid blocking) and then backoff. + */ + if (intel_ring_update_space(ring) >= PAGE_SIZE) + return NULL; + + /* + * Find a request that after waiting upon, there will be at least half + * the ring available. The hysteresis allows us to compete for the + * shared ring and should mean that we sleep less often prior to + * claiming our resources, but not so long that the ring completely + * drains before we can submit our next request. + */ + list_for_each_entry(rq, &tl->requests, link) { + if (rq->ring != ring) + continue; + + if (__intel_ring_space(rq->postfix, + ring->emit, ring->size) > ring->size / 2) + break; + } + if (&rq->link == &tl->requests) + return NULL; /* weird, we will check again later for real */ + + return i915_request_get(rq); +} + +static int +__eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) { int err; + if (likely(atomic_inc_not_zero(&ce->pin_count))) + return 0; + + err = mutex_lock_interruptible(&eb->i915->drm.struct_mutex); + if (err) + return err; + + err = __intel_context_do_pin(ce); + mutex_unlock(&eb->i915->drm.struct_mutex); + + return err; +} + +static void +__eb_unpin_context(struct i915_execbuffer *eb, struct intel_context *ce) +{ + if (likely(atomic_add_unless(&ce->pin_count, -1, 1))) + return; + + mutex_lock(&eb->i915->drm.struct_mutex); + intel_context_unpin(ce); + mutex_unlock(&eb->i915->drm.struct_mutex); +} + +static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce) +{ + struct intel_timeline *tl; + struct i915_request *rq; + int err; + /* * ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged. @@ -2151,7 +2159,7 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - err = intel_context_pin(ce); + err = __eb_pin_context(eb, ce); if (err) return err; @@ -2163,23 +2171,43 @@ static int eb_pin_context(struct i915_execbuffer *eb, struct intel_context *ce) * until the timeline is idle, which in turn releases the wakeref * taken on the engine, and the parent device. */ - err = intel_context_timeline_lock(ce); - if (err) + tl = intel_context_timeline_lock(ce); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); goto err_unpin; + } intel_context_enter(ce); - intel_context_timeline_unlock(ce); + rq = eb_throttle(ce); + + intel_context_timeline_unlock(tl); + + if (rq) { + if (i915_request_wait(rq, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT) < 0) { + i915_request_put(rq); + err = -EINTR; + goto err_exit; + } + + i915_request_put(rq); + } eb->engine = ce->engine; eb->context = ce; return 0; +err_exit: + mutex_lock(&tl->mutex); + intel_context_exit(ce); + intel_context_timeline_unlock(tl); err_unpin: - intel_context_unpin(ce); + __eb_unpin_context(eb, ce); return err; } -static void eb_unpin_context(struct i915_execbuffer *eb) +static void eb_unpin_engine(struct i915_execbuffer *eb) { struct intel_context *ce = eb->context; struct intel_timeline *tl = ce->timeline; @@ -2188,7 +2216,7 @@ static void eb_unpin_context(struct i915_execbuffer *eb) intel_context_exit(ce); mutex_unlock(&tl->mutex); - intel_context_unpin(ce); + __eb_unpin_context(eb, ce); } static unsigned int @@ -2233,9 +2261,9 @@ eb_select_legacy_ring(struct i915_execbuffer *eb, } static int -eb_select_engine(struct i915_execbuffer *eb, - struct drm_file *file, - struct drm_i915_gem_execbuffer2 *args) +eb_pin_engine(struct i915_execbuffer *eb, + struct drm_file *file, + struct drm_i915_gem_execbuffer2 *args) { struct intel_context *ce; unsigned int idx; @@ -2250,7 +2278,7 @@ eb_select_engine(struct i915_execbuffer *eb, if (IS_ERR(ce)) return PTR_ERR(ce); - err = eb_pin_context(eb, ce); + err = __eb_pin_engine(eb, ce); intel_context_put(ce); return err; @@ -2468,16 +2496,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_destroy; - err = i915_mutex_lock_interruptible(dev); - if (err) - goto err_context; - - err = eb_select_engine(&eb, file, args); + err = eb_pin_engine(&eb, file, args); if (unlikely(err)) - goto err_unlock; + goto err_context; - err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ - if (unlikely(err)) + err = i915_mutex_lock_interruptible(dev); + if (err) goto err_engine; err = eb_relocate(&eb); @@ -2635,10 +2659,9 @@ err_batch_unpin: err_vma: if (eb.exec) eb_release_vmas(&eb); -err_engine: - eb_unpin_context(&eb); -err_unlock: mutex_unlock(&dev->struct_mutex); +err_engine: + eb_unpin_engine(&eb); err_context: i915_gem_context_put(eb.gem_context); err_destroy: diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 9fa8b588f18e..053a1307ecb4 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -12,6 +12,7 @@ #include "i915_active.h" #include "intel_context_types.h" #include "intel_engine_types.h" +#include "intel_timeline_types.h" void intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, @@ -118,17 +119,24 @@ static inline void intel_context_put(struct intel_context *ce) kref_put(&ce->ref, ce->ops->destroy); } -static inline int __must_check +static inline struct intel_timeline *__must_check intel_context_timeline_lock(struct intel_context *ce) __acquires(&ce->timeline->mutex) { - return mutex_lock_interruptible(&ce->timeline->mutex); + struct intel_timeline *tl = ce->timeline; + int err; + + err = mutex_lock_interruptible(&tl->mutex); + if (err) + return ERR_PTR(err); + + return tl; } -static inline void intel_context_timeline_unlock(struct intel_context *ce) - __releases(&ce->timeline->mutex) +static inline void intel_context_timeline_unlock(struct intel_timeline *tl) + __releases(&tl->mutex) { - mutex_unlock(&ce->timeline->mutex); + mutex_unlock(&tl->mutex); } int intel_context_prepare_remote_request(struct intel_context *ce, diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index d20750e420c0..957f27a2ec97 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -679,7 +679,6 @@ static int measure_breadcrumb_dw(struct intel_engine_cs *engine) engine->status_page.vma)) goto out_frame; - INIT_LIST_HEAD(&frame->ring.request_list); frame->ring.vaddr = frame->cs; frame->ring.size = sizeof(frame->cs); frame->ring.effective_size = frame->ring.size; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index a0f372807dd4..9965a32601d6 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -69,9 +69,6 @@ struct intel_ring { struct i915_vma *vma; void *vaddr; - struct list_head request_list; - struct list_head active_link; - /* * As we have two types of rings, one global to the engine used * by ringbuffer submission and those that are exclusive to a diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 914bd2db3bc7..d48ec9a76ed1 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -15,8 +15,6 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) spin_lock_init(>->irq_lock); - INIT_LIST_HEAD(>->active_rings); - INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 143b2d78c2cc..f882e2cf159c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -48,8 +48,6 @@ struct intel_gt { struct list_head hwsp_free_list; } timelines; - struct list_head active_rings; - struct intel_wakeref wakeref; struct list_head closed_vma; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index b4c2662dbc75..e9863f4d826b 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1583,6 +1583,7 @@ static void execlists_context_unpin(struct intel_context *ce) { i915_gem_context_unpin_hw_id(ce->gem_context); i915_gem_object_unpin_map(ce->state->obj); + intel_ring_reset(ce->ring, ce->ring->tail); } static void diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index 409d764f8c6d..1d9c125b76d0 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -1250,7 +1250,7 @@ void intel_ring_unpin(struct intel_ring *ring) return; /* Discard any unused bytes beyond that submitted to hw. */ - intel_ring_reset(ring, ring->tail); + intel_ring_reset(ring, ring->emit); i915_vma_unset_ggtt_write(vma); if (i915_vma_is_map_and_fenceable(vma)) @@ -1311,7 +1311,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) return ERR_PTR(-ENOMEM); kref_init(&ring->ref); - INIT_LIST_HEAD(&ring->request_list); ring->size = size; /* Workaround an erratum on the i830 which causes a hang if @@ -1865,7 +1864,10 @@ static int ring_request_alloc(struct i915_request *request) return 0; } -static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) +static noinline int +wait_for_space(struct intel_ring *ring, + struct intel_timeline *tl, + unsigned int bytes) { struct i915_request *target; long timeout; @@ -1873,15 +1875,18 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) if (intel_ring_update_space(ring) >= bytes) return 0; - GEM_BUG_ON(list_empty(&ring->request_list)); - list_for_each_entry(target, &ring->request_list, ring_link) { + GEM_BUG_ON(list_empty(&tl->requests)); + list_for_each_entry(target, &tl->requests, link) { + if (target->ring != ring) + continue; + /* Would completion of this request free enough space? */ if (bytes <= __intel_ring_space(target->postfix, ring->emit, ring->size)) break; } - if (WARN_ON(&target->ring_link == &ring->request_list)) + if (GEM_WARN_ON(&target->link == &tl->requests)) return -ENOSPC; timeout = i915_request_wait(target, @@ -1948,7 +1953,7 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) */ GEM_BUG_ON(!rq->reserved_space); - ret = wait_for_space(ring, total_bytes); + ret = wait_for_space(ring, rq->timeline, total_bytes); if (unlikely(ret)) return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 54a11dde3076..5d43cbc3f345 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -58,7 +58,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) ring->vaddr = (void *)(ring + 1); atomic_set(&ring->pin_count, 1); - INIT_LIST_HEAD(&ring->request_list); intel_ring_update_space(ring); return ring; diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index da9c49e2adaf..6fbc72bc290e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -20,10 +20,13 @@ static int request_sync(struct i915_request *rq) i915_request_add(rq); timeout = i915_request_wait(rq, 0, HZ / 10); - if (timeout < 0) + if (timeout < 0) { err = timeout; - else + } else { + mutex_lock(&rq->timeline->mutex); i915_request_retire_upto(rq); + mutex_unlock(&rq->timeline->mutex); + } i915_request_put(rq); @@ -35,6 +38,7 @@ static int context_sync(struct intel_context *ce) struct intel_timeline *tl = ce->timeline; int err = 0; + mutex_lock(&tl->mutex); do { struct i915_request *rq; long timeout; @@ -55,6 +59,7 @@ static int context_sync(struct intel_context *ce) i915_request_put(rq); } while (!err); + mutex_unlock(&tl->mutex); return err; } diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2b7645fd22d7..7cefff8b2c5e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -181,40 +181,6 @@ i915_request_remove_from_client(struct i915_request *request) spin_unlock(&file_priv->mm.lock); } -static void advance_ring(struct i915_request *request) -{ - struct intel_ring *ring = request->ring; - unsigned int tail; - - /* - * We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. - * - * Note this requires that we are always called in request - * completion order. - */ - GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list)); - if (list_is_last(&request->ring_link, &ring->request_list)) { - /* - * We may race here with execlists resubmitting this request - * as we retire it. The resubmission will move the ring->tail - * forwards (to request->wa_tail). We either read the - * current value that was written to hw, or the value that - * is just about to be. Either works, if we miss the last two - * noops - they are safe to be replayed on a reset. - */ - tail = READ_ONCE(request->tail); - list_del(&ring->active_link); - } else { - tail = request->postfix; - } - list_del_init(&request->ring_link); - - ring->head = tail; -} - static void free_capture_list(struct i915_request *request) { struct i915_capture_list *capture; @@ -232,7 +198,7 @@ static bool i915_request_retire(struct i915_request *rq) { struct i915_active_request *active, *next; - lockdep_assert_held(&rq->i915->drm.struct_mutex); + lockdep_assert_held(&rq->timeline->mutex); if (!i915_request_completed(rq)) return false; @@ -244,7 +210,17 @@ static bool i915_request_retire(struct i915_request *rq) GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); trace_i915_request_retire(rq); - advance_ring(rq); + /* + * We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. + * + * Note this requires that we are always called in request + * completion order. + */ + GEM_BUG_ON(!list_is_first(&rq->link, &rq->timeline->requests)); + rq->ring->head = rq->postfix; /* * Walk through the active list, calling retire on each. This allows @@ -321,7 +297,7 @@ static bool i915_request_retire(struct i915_request *rq) void i915_request_retire_upto(struct i915_request *rq) { - struct intel_ring *ring = rq->ring; + struct intel_timeline * const tl = rq->timeline; struct i915_request *tmp; GEM_TRACE("%s fence %llx:%lld, current %d\n", @@ -329,15 +305,11 @@ void i915_request_retire_upto(struct i915_request *rq) rq->fence.context, rq->fence.seqno, hwsp_seqno(rq)); - lockdep_assert_held(&rq->i915->drm.struct_mutex); + lockdep_assert_held(&tl->mutex); GEM_BUG_ON(!i915_request_completed(rq)); - if (list_empty(&rq->ring_link)) - return; - do { - tmp = list_first_entry(&ring->request_list, - typeof(*tmp), ring_link); + tmp = list_first_entry(&tl->requests, typeof(*tmp), link); } while (i915_request_retire(tmp) && tmp != rq); } @@ -564,29 +536,28 @@ semaphore_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) return NOTIFY_DONE; } -static void ring_retire_requests(struct intel_ring *ring) +static void retire_requests(struct intel_timeline *tl) { struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &ring->request_list, ring_link) + list_for_each_entry_safe(rq, rn, &tl->requests, link) if (!i915_request_retire(rq)) break; } static noinline struct i915_request * -request_alloc_slow(struct intel_context *ce, gfp_t gfp) +request_alloc_slow(struct intel_timeline *tl, gfp_t gfp) { - struct intel_ring *ring = ce->ring; struct i915_request *rq; - if (list_empty(&ring->request_list)) + if (list_empty(&tl->requests)) goto out; if (!gfpflags_allow_blocking(gfp)) goto out; /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link); + rq = list_first_entry(&tl->requests, typeof(*rq), link); i915_request_retire(rq); rq = kmem_cache_alloc(global.slab_requests, @@ -595,11 +566,11 @@ request_alloc_slow(struct intel_context *ce, gfp_t gfp) return rq; /* Ratelimit ourselves to prevent oom from malicious clients */ - rq = list_last_entry(&ring->request_list, typeof(*rq), ring_link); + rq = list_last_entry(&tl->requests, typeof(*rq), link); cond_synchronize_rcu(rq->rcustate); /* Retire our old requests in the hope that we free some */ - ring_retire_requests(ring); + retire_requests(tl); out: return kmem_cache_alloc(global.slab_requests, gfp); @@ -650,7 +621,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq = kmem_cache_alloc(global.slab_requests, gfp | __GFP_RETRY_MAYFAIL | __GFP_NOWARN); if (unlikely(!rq)) { - rq = request_alloc_slow(ce, gfp); + rq = request_alloc_slow(tl, gfp); if (!rq) { ret = -ENOMEM; goto err_unreserve; @@ -742,15 +713,15 @@ struct i915_request * i915_request_create(struct intel_context *ce) { struct i915_request *rq; - int err; + struct intel_timeline *tl; - err = intel_context_timeline_lock(ce); - if (err) - return ERR_PTR(err); + tl = intel_context_timeline_lock(ce); + if (IS_ERR(tl)) + return ERR_CAST(tl); /* Move our oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry(&ce->ring->request_list, typeof(*rq), ring_link); - if (!list_is_last(&rq->ring_link, &ce->ring->request_list)) + rq = list_first_entry(&tl->requests, typeof(*rq), link); + if (!list_is_last(&rq->link, &tl->requests)) i915_request_retire(rq); intel_context_enter(ce); @@ -760,22 +731,22 @@ i915_request_create(struct intel_context *ce) goto err_unlock; /* Check that we do not interrupt ourselves with a new request */ - rq->cookie = lockdep_pin_lock(&ce->timeline->mutex); + rq->cookie = lockdep_pin_lock(&tl->mutex); return rq; err_unlock: - intel_context_timeline_unlock(ce); + intel_context_timeline_unlock(tl); return rq; } static int i915_request_await_start(struct i915_request *rq, struct i915_request *signal) { - if (list_is_first(&signal->ring_link, &signal->ring->request_list)) + if (list_is_first(&signal->link, &signal->timeline->requests)) return 0; - signal = list_prev_entry(signal, ring_link); + signal = list_prev_entry(signal, link); if (intel_timeline_sync_is_later(rq->timeline, &signal->fence)) return 0; @@ -1155,7 +1126,6 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) { struct intel_engine_cs *engine = rq->engine; struct intel_ring *ring = rq->ring; - struct i915_request *prev; u32 *cs; GEM_TRACE("%s fence %llx:%lld\n", @@ -1168,6 +1138,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) */ GEM_BUG_ON(rq->reserved_space > ring->space); rq->reserved_space = 0; + rq->emitted_jiffies = jiffies; /* * Record the position of the start of the breadcrumb so that @@ -1179,14 +1150,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) GEM_BUG_ON(IS_ERR(cs)); rq->postfix = intel_ring_offset(rq, cs); - prev = __i915_request_add_to_timeline(rq); - - list_add_tail(&rq->ring_link, &ring->request_list); - if (list_is_first(&rq->ring_link, &ring->request_list)) - list_add(&ring->active_link, &rq->i915->gt.active_rings); - rq->emitted_jiffies = jiffies; - - return prev; + return __i915_request_add_to_timeline(rq); } void __i915_request_queue(struct i915_request *rq, @@ -1212,10 +1176,11 @@ void __i915_request_queue(struct i915_request *rq, void i915_request_add(struct i915_request *rq) { struct i915_sched_attr attr = rq->gem_context->sched; + struct intel_timeline * const tl = rq->timeline; struct i915_request *prev; - lockdep_assert_held(&rq->timeline->mutex); - lockdep_unpin_lock(&rq->timeline->mutex, rq->cookie); + lockdep_assert_held(&tl->mutex); + lockdep_unpin_lock(&tl->mutex, rq->cookie); trace_i915_request_add(rq); @@ -1266,10 +1231,10 @@ void i915_request_add(struct i915_request *rq) * work on behalf of others -- but instead we should benefit from * improved resource management. (Well, that's the theory at least.) */ - if (prev && i915_request_completed(prev)) + if (prev && i915_request_completed(prev) && prev->timeline == tl) i915_request_retire_upto(prev); - mutex_unlock(&rq->timeline->mutex); + mutex_unlock(&tl->mutex); } static unsigned long local_clock_us(unsigned int *cpu) @@ -1489,18 +1454,43 @@ out: bool i915_retire_requests(struct drm_i915_private *i915) { - struct intel_ring *ring, *tmp; + struct intel_gt_timelines *timelines = &i915->gt.timelines; + struct intel_timeline *tl, *tn; + LIST_HEAD(free); + + spin_lock(&timelines->lock); + list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { + if (!mutex_trylock(&tl->mutex)) + continue; - lockdep_assert_held(&i915->drm.struct_mutex); + intel_timeline_get(tl); + GEM_BUG_ON(!tl->active_count); + tl->active_count++; /* pin the list element */ + spin_unlock(&timelines->lock); - list_for_each_entry_safe(ring, tmp, - &i915->gt.active_rings, active_link) { - intel_ring_get(ring); /* last rq holds reference! */ - ring_retire_requests(ring); - intel_ring_put(ring); + retire_requests(tl); + + spin_lock(&timelines->lock); + + /* Resume iteration after dropping lock */ + list_safe_reset_next(tl, tn, link); + if (!--tl->active_count) + list_del(&tl->link); + + mutex_unlock(&tl->mutex); + + /* Defer the final release to after the spinlock */ + if (refcount_dec_and_test(&tl->kref.refcount)) { + GEM_BUG_ON(tl->active_count); + list_add(&tl->link, &free); + } } + spin_unlock(&timelines->lock); + + list_for_each_entry_safe(tl, tn, &free, link) + __intel_timeline_free(&tl->kref); - return !list_empty(&i915->gt.active_rings); + return !list_empty(&timelines->active_list); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index fec1d5f17c94..8ac6e1226a56 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -223,9 +223,6 @@ struct i915_request { /** timeline->request entry for this request */ struct list_head link; - /** ring->request_list entry for this request */ - struct list_head ring_link; - struct drm_i915_file_private *file_priv; /** file_priv list entry for this request */ struct list_head client_link; -- cgit v1.2.3