diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d400d67..7d67774 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3144,7 +3144,6 @@ int __must_check i915_gem_context_init(struct drm_device *dev); void i915_gem_context_fini(struct drm_device *dev); void i915_gem_context_reset(struct drm_device *dev); int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); -int i915_gem_context_enable(struct drm_i915_gem_request *req); void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); int i915_switch_context(struct drm_i915_gem_request *req); struct intel_context * diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f56af0a..bb6961e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3351,8 +3351,10 @@ int i915_gpu_idle(struct drm_device *dev) struct intel_engine_cs *ring; int ret, i; - /* Flush everything onto the inactive list. */ for_each_ring(ring, dev_priv, i) { + if (ring->last_context == NULL) + continue; + if (!i915.enable_execlists) { struct drm_i915_gem_request *req; @@ -4764,7 +4766,7 @@ i915_gem_init_hw(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; struct intel_engine_cs *ring; - int ret, i, j; + int ret, i; if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt()) return -EIO; @@ -4840,44 +4842,6 @@ i915_gem_init_hw(struct drm_device *dev) * on re-initialisation */ ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100); - if (ret) - goto out; - - /* Now it is safe to go back round and do everything else: */ - for_each_ring(ring, dev_priv, i) { - struct drm_i915_gem_request *req; - - WARN_ON(!ring->default_context); - - ret = i915_gem_request_alloc(ring, ring->default_context, &req); - if (ret) { - i915_gem_cleanup_ringbuffer(dev); - goto out; - } - - if (ring->id == RCS) { - for (j = 0; j < NUM_L3_SLICES(dev); j++) - i915_gem_l3_remap(req, j); - } - - ret = i915_ppgtt_init_ring(req); - if (ret && ret != -EIO) { - DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret); - i915_gem_request_cancel(req); - i915_gem_cleanup_ringbuffer(dev); - goto out; - } - - ret = i915_gem_context_enable(req); - if (ret && ret != -EIO) { - DRM_ERROR("Context enable ring #%d failed %d\n", i, ret); - i915_gem_request_cancel(req); - i915_gem_cleanup_ringbuffer(dev); - goto out; - } - - i915_add_request_no_flush(req); - } out: intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 0433d25..711e927 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -411,10 +411,10 @@ void i915_gem_context_fini(struct drm_device *dev) intel_gpu_reset(dev); /* When default context is created and switched to, base object refcount - * will be 2 (+1 from object creation and +1 from do_switch()). + * will be 2 (+1 from object creation and +1 from do_rcs_switch()). * i915_gem_context_fini() will be called after gpu_idle() has switched * to default context. So we need to unreference the base object once - * to offset the do_switch part, so that i915_gem_context_unreference() + * to offset the do_rcs_switch part, so that i915_gem_context_unreference() * can then free the base object correctly. */ WARN_ON(!dev_priv->ring[RCS].last_context); if (dev_priv->ring[RCS].last_context == dctx) { @@ -441,27 +441,6 @@ void i915_gem_context_fini(struct drm_device *dev) i915_gem_context_unreference(dctx); } -int i915_gem_context_enable(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *ring = req->ring; - int ret; - - if (i915.enable_execlists) { - if (ring->init_context == NULL) - return 0; - - ret = ring->init_context(req); - } else - ret = i915_switch_context(req); - - if (ret) { - DRM_ERROR("ring init context: %d\n", ret); - return ret; - } - - return 0; -} - static int context_idr_cleanup(int id, void *p, void *data) { struct intel_context *ctx = p; @@ -595,50 +574,57 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) return ret; } -static inline bool should_skip_switch(struct intel_engine_cs *ring, - struct intel_context *from, - struct intel_context *to) +static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt, + struct intel_engine_cs *ring, + struct intel_context *to) { if (to->remap_slice) return false; - if (to->ppgtt && from == to && - !(intel_ring_flag(ring) & to->ppgtt->pd_dirty_rings)) - return true; + if (!to->legacy_hw_ctx.initialized) + return false; - return false; + if (ppgtt && (intel_ring_flag(ring) & ppgtt->pd_dirty_rings)) + return false; + + return to == ring->last_context; } static bool -needs_pd_load_pre(struct intel_engine_cs *ring, struct intel_context *to) +needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, + struct intel_engine_cs *ring, + struct intel_context *to) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; + if (!ppgtt) + return false; + + /* Always load the ppgtt on first use */ + if (!ring->last_context) + return true; - if (!to->ppgtt) + /* Same context without new entries, skip */ + if (ring->last_context == to && + !(intel_ring_flag(ring) & ppgtt->pd_dirty_rings)) return false; - if (INTEL_INFO(ring->dev)->gen < 8) + if (ring->id != RCS) return true; - if (ring != &dev_priv->ring[RCS]) + if (INTEL_INFO(ring->dev)->gen < 8) return true; return false; } static bool -needs_pd_load_post(struct intel_engine_cs *ring, struct intel_context *to, - u32 hw_flags) +needs_pd_load_post(struct i915_hw_ppgtt *ppgtt, + struct intel_context *to, + u32 hw_flags) { - struct drm_i915_private *dev_priv = ring->dev->dev_private; - - if (!to->ppgtt) - return false; - - if (!IS_GEN8(ring->dev)) + if (!ppgtt) return false; - if (ring != &dev_priv->ring[RCS]) + if (!IS_GEN8(to->i915)) return false; if (hw_flags & MI_RESTORE_INHIBIT) @@ -647,59 +633,34 @@ needs_pd_load_post(struct intel_engine_cs *ring, struct intel_context *to, return false; } -static int do_switch(struct drm_i915_gem_request *req) +static int do_rcs_switch(struct drm_i915_gem_request *req) { struct intel_context *to = req->ctx; struct intel_engine_cs *ring = req->ring; - struct drm_i915_private *dev_priv = ring->dev->dev_private; - struct intel_context *from = ring->last_context; - u32 hw_flags = 0; - bool uninitialized = false; + struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; + struct intel_context *from; + u32 hw_flags; int ret, i; - if (from != NULL && ring == &dev_priv->ring[RCS]) { - BUG_ON(from->legacy_hw_ctx.rcs_state == NULL); - BUG_ON(!i915_gem_obj_is_pinned(from->legacy_hw_ctx.rcs_state)); - } - - if (should_skip_switch(ring, from, to)) + if (skip_rcs_switch(ppgtt, ring, to)) return 0; /* Trying to pin first makes error handling easier. */ - if (ring == &dev_priv->ring[RCS]) { - ret = i915_gem_obj_ggtt_pin(to->legacy_hw_ctx.rcs_state, - get_context_alignment(ring->dev), 0); - if (ret) - return ret; - } + ret = i915_gem_obj_ggtt_pin(to->legacy_hw_ctx.rcs_state, + get_context_alignment(ring->dev), + 0); + if (ret) + return ret; /* * Pin can switch back to the default context if we end up calling into * evict_everything - as a last ditch gtt defrag effort that also * switches to the default context. Hence we need to reload from here. + * + * XXX: Doing so is painfully broken! */ from = ring->last_context; - if (needs_pd_load_pre(ring, to)) { - /* Older GENs and non render rings still want the load first, - * "PP_DCLV followed by PP_DIR_BASE register through Load - * Register Immediate commands in Ring Buffer before submitting - * a context."*/ - trace_switch_mm(ring, to); - ret = to->ppgtt->switch_mm(to->ppgtt, req); - if (ret) - goto unpin_out; - - /* Doing a PD load always reloads the page dirs */ - to->ppgtt->pd_dirty_rings &= ~intel_ring_flag(ring); - } - - if (ring != &dev_priv->ring[RCS]) { - if (from) - i915_gem_context_unreference(from); - goto done; - } - /* * Clear this page out of any CPU caches for coherent swap-in/out. Note * that thanks to write = false in this call and us not setting any gpu @@ -712,53 +673,32 @@ static int do_switch(struct drm_i915_gem_request *req) if (ret) goto unpin_out; - if (!to->legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) { - hw_flags |= MI_RESTORE_INHIBIT; - /* NB: If we inhibit the restore, the context is not allowed to - * die because future work may end up depending on valid address - * space. This means we must enforce that a page table load - * occur when this occurs. */ - } else if (to->ppgtt && - (intel_ring_flag(ring) & to->ppgtt->pd_dirty_rings)) { - hw_flags |= MI_FORCE_RESTORE; - to->ppgtt->pd_dirty_rings &= ~intel_ring_flag(ring); - } - - /* We should never emit switch_mm more than once */ - WARN_ON(needs_pd_load_pre(ring, to) && - needs_pd_load_post(ring, to, hw_flags)); - - ret = mi_set_context(req, hw_flags); - if (ret) - goto unpin_out; - - /* GEN8 does *not* require an explicit reload if the PDPs have been - * setup, and we do not wish to move them. - */ - if (needs_pd_load_post(ring, to, hw_flags)) { + if (needs_pd_load_pre(ppgtt, ring, to)) { + /* Older GENs and non render rings still want the load first, + * "PP_DCLV followed by PP_DIR_BASE register through Load + * Register Immediate commands in Ring Buffer before submitting + * a context."*/ trace_switch_mm(ring, to); - ret = to->ppgtt->switch_mm(to->ppgtt, req); - /* The hardware context switch is emitted, but we haven't - * actually changed the state - so it's probably safe to bail - * here. Still, let the user know something dangerous has - * happened. - */ - if (ret) { - DRM_ERROR("Failed to change address space on context switch\n"); + ret = ppgtt->switch_mm(ppgtt, req); + if (ret) goto unpin_out; - } } - for (i = 0; i < MAX_L3_SLICES; i++) { - if (!(to->remap_slice & (1<legacy_hw_ctx.initialized || i915_gem_context_is_default(to)) + /* NB: If we inhibit the restore, the context is not allowed to + * die because future work may end up depending on valid address + * space. This means we must enforce that a page table load + * occur when this occurs. */ + hw_flags = MI_RESTORE_INHIBIT; + else if (ppgtt && intel_ring_flag(ring) & ppgtt->pd_dirty_rings) + hw_flags = MI_FORCE_RESTORE; + else + hw_flags = 0; + + if (to != from || (hw_flags & MI_FORCE_RESTORE)) { + ret = mi_set_context(req, hw_flags); if (ret) - DRM_DEBUG_DRIVER("L3 remapping failed\n"); - else - to->remap_slice &= ~(1<legacy_hw_ctx.rcs_state); i915_gem_context_unreference(from); } - - uninitialized = !to->legacy_hw_ctx.initialized; - to->legacy_hw_ctx.initialized = true; - -done: i915_gem_context_reference(to); ring->last_context = to; - if (uninitialized) { + /* GEN8 does *not* require an explicit reload if the PDPs have been + * setup, and we do not wish to move them. + */ + if (needs_pd_load_post(ppgtt, to, hw_flags)) { + trace_switch_mm(ring, to); + ret = ppgtt->switch_mm(ppgtt, req); + /* The hardware context switch is emitted, but we haven't + * actually changed the state - so it's probably safe to bail + * here. Still, let the user know something dangerous has + * happened. + */ + if (ret) + return ret; + } + + if (ppgtt) + ppgtt->pd_dirty_rings &= ~intel_ring_flag(ring); + + for (i = 0; i < MAX_L3_SLICES; i++) { + if (!(to->remap_slice & (1<remap_slice &= ~(1<legacy_hw_ctx.initialized) { if (ring->init_context) { ret = ring->init_context(req); if (ret) - DRM_ERROR("ring init context: %d\n", ret); + return ret; } + to->legacy_hw_ctx.initialized = true; } return 0; unpin_out: - if (ring->id == RCS) - i915_gem_object_ggtt_unpin(to->legacy_hw_ctx.rcs_state); + i915_gem_object_ggtt_unpin(to->legacy_hw_ctx.rcs_state); return ret; } @@ -823,22 +787,39 @@ unpin_out: int i915_switch_context(struct drm_i915_gem_request *req) { struct intel_engine_cs *ring = req->ring; - struct drm_i915_private *dev_priv = ring->dev->dev_private; + struct drm_i915_private *dev_priv = req->i915; WARN_ON(i915.enable_execlists); WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); - if (req->ctx->legacy_hw_ctx.rcs_state == NULL) { /* We have the fake context */ - if (req->ctx != ring->last_context) { - i915_gem_context_reference(req->ctx); + if (ring->id != RCS || + req->ctx->legacy_hw_ctx.rcs_state == NULL) { + struct intel_context *to = req->ctx; + struct i915_hw_ppgtt *ppgtt = + to->ppgtt ?: req->i915->mm.aliasing_ppgtt; + + if (needs_pd_load_pre(ppgtt, ring, to)) { + int ret; + + trace_switch_mm(ring, to); + ret = ppgtt->switch_mm(ppgtt, req); + if (ret) + return ret; + + ppgtt->pd_dirty_rings &= ~intel_ring_flag(ring); + } + + if (to != ring->last_context) { + i915_gem_context_reference(to); if (ring->last_context) i915_gem_context_unreference(ring->last_context); - ring->last_context = req->ctx; + ring->last_context = to; } + return 0; } - return do_switch(req); + return do_rcs_switch(req); } static bool contexts_enabled(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b37fe0d..96ada13 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2168,20 +2168,6 @@ int i915_ppgtt_init_hw(struct drm_device *dev) return 0; } -int i915_ppgtt_init_ring(struct drm_i915_gem_request *req) -{ - struct drm_i915_private *dev_priv = req->ring->dev->dev_private; - struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - - if (i915.enable_execlists) - return 0; - - if (!ppgtt) - return 0; - - return ppgtt->switch_mm(ppgtt, req); -} - struct i915_hw_ppgtt * i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) { @@ -2747,7 +2733,6 @@ void i915_global_gtt_cleanup(struct drm_device *dev) struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; ppgtt->base.cleanup(&ppgtt->base); - kfree(ppgtt); } if (drm_mm_initialized(&vm->mm)) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index a216397..5003036 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -525,7 +525,6 @@ void i915_global_gtt_cleanup(struct drm_device *dev); int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt); int i915_ppgtt_init_hw(struct drm_device *dev); -int i915_ppgtt_init_ring(struct drm_i915_gem_request *req); void i915_ppgtt_release(struct kref *kref); struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv);