From 4dd43927e2ebc3957fd48eaea0a4b202317a1c22 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 31 Dec 2013 12:38:26 +0000 Subject: [PATCH] drm/i915: Fix obj->map_and_fenceable for ppgtt With ppgtt, it is no longer correct to mark an object as map_and_fenceable if we simply unbind it from the global gtt. This has consequences during execbuffer where we simply use obj->map_and_fenceable in use_cpu_reloc() to decide which access method to use for writing into the object. Now for a ppgtt object, map_and_fenceable will be true when it is not bound into the ggtt but only in a ppgtt, leading to an invalid access on a non-llc architecture. Signed-off-by: Chris Wilson Cc: Ben Widawsky Cc: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem.c | 8 ++-- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 75 ++++++++++++++---------------- 2 files changed, 40 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a1948c5..7130a2a4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3159,9 +3159,8 @@ int i915_vma_unbind(struct i915_vma *vma) vma->unbind_vma(vma); list_del_init(&vma->mm_list); - /* Avoid an unnecessary call to unbind on rebind. */ if (i915_is_ggtt(vma->vm)) - obj->map_and_fenceable = true; + obj->map_and_fenceable = false; drm_mm_remove_node(&vma->node); i915_gem_vma_destroy(vma); @@ -3513,6 +3512,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj) return 0; } } else if (enable) { + if (WARN_ON(!obj->map_and_fenceable)) + return -EINVAL; + reg = i915_find_fence_reg(dev); if (IS_ERR(reg)) return PTR_ERR(reg); @@ -4576,8 +4578,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, obj->fence_reg = I915_FENCE_REG_NONE; obj->madv = I915_MADV_WILLNEED; - /* Avoid an unnecessary call to unbind on the first bind. */ - obj->map_and_fenceable = true; i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size); } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1ecb31d..0612a0a 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -35,6 +35,7 @@ #define __EXEC_OBJECT_HAS_PIN (1<<31) #define __EXEC_OBJECT_HAS_FENCE (1<<30) +#define __EXEC_OBJECT_NEEDS_MAP (1<<29) #define __EXEC_OBJECT_NEEDS_BIAS (1<<28) #define BATCH_OFFSET_BIAS (256*1024) @@ -532,14 +533,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb) } static int -need_reloc_mappable(struct i915_vma *vma) -{ - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - return entry->relocation_count && !use_cpu_reloc(vma->obj) && - i915_is_ggtt(vma->vm); -} - -static int i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, struct intel_engine_cs *ring, bool *need_reloc) @@ -552,14 +545,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, int ret; flags = 0; - - need_fence = - has_fenced_gpu_access && - entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; - if (need_fence || need_reloc_mappable(vma)) + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) flags |= PIN_MAPPABLE; - if (entry->flags & EXEC_OBJECT_NEEDS_GTT) flags |= PIN_GLOBAL; if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) @@ -598,26 +585,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, } static bool -eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access) +need_reloc_mappable(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - struct drm_i915_gem_object *obj = vma->obj; - bool need_fence, need_mappable; - need_fence = - has_fenced_gpu_access && - entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(vma); + if (entry->relocation_count == 0) + return false; + + if (!i915_is_ggtt(vma->vm)) + return false; + + /* See also use_cpu_reloc() */ + if (HAS_LLC(vma->obj->base.dev)) + return false; + + if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return false; - WARN_ON((need_mappable || need_fence) && + return true; +} + +static bool +eb_vma_misplaced(struct i915_vma *vma) +{ + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; + struct drm_i915_gem_object *obj = vma->obj; + + WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !i915_is_ggtt(vma->vm)); if (entry->alignment && vma->node.start & (entry->alignment - 1)) return true; - if (need_mappable && !obj->map_and_fenceable) + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) return true; if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && @@ -658,9 +659,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, obj->tiling_mode != I915_TILING_NONE; need_mappable = need_fence || need_reloc_mappable(vma); - if (need_mappable) + if (need_mappable) { + entry->flags |= __EXEC_OBJECT_NEEDS_MAP; list_move(&vma->exec_list, &ordered_vmas); - else + } else list_move_tail(&vma->exec_list, &ordered_vmas); obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; @@ -690,7 +692,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, if (!drm_mm_node_allocated(&vma->node)) continue; - if (eb_vma_misplaced(vma, has_fenced_gpu_access)) + if (eb_vma_misplaced(vma)) ret = i915_vma_unbind(vma); else ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); @@ -1367,24 +1369,19 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. * hsw should have this fixed, but bdw mucks it up again. */ - if (flags & I915_DISPATCH_SECURE && - !batch_obj->has_global_gtt_mapping) { - /* When we have multiple VMs, we'll need to make sure that we - * allocate space first */ - struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj); - BUG_ON(!vma); - vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND); - } + if (flags & I915_DISPATCH_SECURE) { + ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); + if (ret) + goto err; - if (flags & I915_DISPATCH_SECURE) exec_start += i915_gem_obj_ggtt_offset(batch_obj); - else + } else exec_start += i915_gem_obj_offset(batch_obj, vm); ret = legacy_ringbuffer_submission(dev, file, ring, ctx, args, &eb->vmas, batch_obj, exec_start, flags); if (ret) - goto err; + goto err_batch; err: /* the request owns the ref now */ -- 1.9.1