From 31e38d047c7b3fbfffe5eb66050baa0e4500a64b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 2 Jun 2011 19:39:05 +0100 Subject: [PATCH] drm/i915: Seperate fence pin counting from normal bind pin counting In order to correctly account for reserving space in the GTT and fences for a batch buffer, we need to independently track whether the fence is pinned due to a fenced GPU access in the batch from from whether the buffer is pinned in the aperture. Currently we count the fenced as pinned if the buffer has already been seen in the execbuffer. This leads to a false accounting of available fence registers, causing frequent mass evictions. Worse, if coupled with the change to make i915_gem_object_get_fence() report EDADLK upon fence starvation, the batchbuffer can fail with only one fence required... Signed-off-by: Chris Wilson Cc: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 22 +++++ drivers/gpu/drm/i915/i915_gem.c | 7 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 139 ++++++++++++++++++---------- drivers/gpu/drm/i915/intel_display.c | 21 ++++- 4 files changed, 135 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 130a601..6056c15 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -784,6 +784,9 @@ struct drm_i915_gem_object { unsigned int pin_count : 4; #define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf + unsigned int fence_pin_count : 3; +#define DRM_I915_GEM_OBJECT_MAX_FENCE_PIN_COUNT 0x7 + /** * Is the object at the current location in the gtt mappable and * fenceable? Used to avoid costly recalculations. @@ -1184,7 +1187,26 @@ int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj, struct intel_ring_buffer *pipelined); int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj); +static inline void +i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) +{ + if (obj->fence_reg != I915_FENCE_REG_NONE) { + BUG_ON(obj->fence_pin_count == DRM_I915_GEM_OBJECT_MAX_FENCE_PIN_COUNT); + obj->fence_pin_count++; + } +} + +static inline void +i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) +{ + if (obj->fence_reg != I915_FENCE_REG_NONE) { + BUG_ON(obj->fence_pin_count == 0); + obj->fence_pin_count--; + } +} + void i915_gem_retire_requests(struct drm_device *dev); + void i915_gem_reset(struct drm_device *dev); void i915_gem_clflush_object(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e37d69f..2e2b15e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1977,6 +1977,7 @@ static void i915_gem_reset_fences(struct drm_device *dev) i915_gem_release_mmap(obj); reg->obj->fence_reg = I915_FENCE_REG_NONE; + reg->obj->fence_pin_count = 0; reg->obj->fenced_gpu_access = false; reg->obj->last_fenced_seqno = 0; i915_gem_clear_fence_reg(dev, reg); @@ -2638,6 +2639,8 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj) { int ret; + BUG_ON(obj->fence_pin_count); + if (obj->tiling_mode) i915_gem_release_mmap(obj); @@ -2671,7 +2674,7 @@ i915_find_fence_reg(struct drm_device *dev, if (!reg->obj) return reg; - if (!reg->obj->pin_count) + if (!reg->obj->fence_pin_count) avail = reg; } @@ -2681,7 +2684,7 @@ i915_find_fence_reg(struct drm_device *dev, /* None available, try to steal one or wait for a user to finish */ avail = first = NULL; list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { - if (reg->obj->pin_count) + if (reg->obj->fence_pin_count) continue; if (first == NULL) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0010aee..ad1d56f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -463,6 +463,54 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, return ret; } +#define __EXEC_OBJECT_HAS_FENCE (1<<31) + +static int +pin_and_fence_object(struct drm_i915_gem_object *obj, + struct intel_ring_buffer *ring) +{ + struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; + bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; + bool need_fence, need_mappable; + int ret; + + need_fence = + has_fenced_gpu_access && + entry->flags & EXEC_OBJECT_NEEDS_FENCE && + obj->tiling_mode != I915_TILING_NONE; + need_mappable = + entry->relocation_count ? true : need_fence; + + ret = i915_gem_object_pin(obj, entry->alignment, need_mappable); + if (ret) + return ret; + + if (has_fenced_gpu_access) { + if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { + if (obj->tiling_mode) { + ret = i915_gem_object_get_fence(obj, ring); + if (ret) + goto err_unpin; + + entry->flags |= __EXEC_OBJECT_HAS_FENCE; + i915_gem_object_pin_fence(obj); + } else { + ret = i915_gem_object_put_fence(obj); + if (ret) + goto err_unpin; + } + } + obj->pending_fenced_gpu_access = need_fence; + } + + entry->offset = obj->gtt_offset; + return 0; + +err_unpin: + i915_gem_object_unpin(obj); + return ret; +} + static int i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, struct drm_file *file, @@ -520,6 +568,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, list_for_each_entry(obj, objects, exec_list) { struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; bool need_fence, need_mappable; + if (!obj->gtt_space) continue; @@ -534,58 +583,47 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, (need_mappable && !obj->map_and_fenceable)) ret = i915_gem_object_unbind(obj); else - ret = i915_gem_object_pin(obj, - entry->alignment, - need_mappable); + ret = pin_and_fence_object(obj, ring); if (ret) goto err; - - entry++; } /* Bind fresh objects */ list_for_each_entry(obj, objects, exec_list) { - struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; - bool need_fence; - - need_fence = - has_fenced_gpu_access && - entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode != I915_TILING_NONE; - - if (!obj->gtt_space) { - bool need_mappable = - entry->relocation_count ? true : need_fence; - - ret = i915_gem_object_pin(obj, - entry->alignment, - need_mappable); - if (ret) - break; - } + if (obj->gtt_space) + continue; - if (has_fenced_gpu_access) { - if (need_fence) { - ret = i915_gem_object_get_fence(obj, ring); - if (ret) - break; - } else if (entry->flags & EXEC_OBJECT_NEEDS_FENCE && - obj->tiling_mode == I915_TILING_NONE) { - /* XXX pipelined! */ - ret = i915_gem_object_put_fence(obj); - if (ret) - break; - } - obj->pending_fenced_gpu_access = need_fence; + ret = pin_and_fence_object(obj, ring); + if (ret) { + int ret_ignore; + + /* This can potentially raise a harmless + * -EINVAL if we failed to bind in the above + * call. It cannot raise -EINTR since we know + * that the bo is freshly bound and so will + * not need to be flushed or waited upon. + */ + ret_ignore = i915_gem_object_unbind(obj); + (void)ret_ignore; + WARN_ON(obj->gtt_space); + break; } - - entry->offset = obj->gtt_offset; } /* Decrement pin count for bound objects */ list_for_each_entry(obj, objects, exec_list) { - if (obj->gtt_space) - i915_gem_object_unpin(obj); + struct drm_i915_gem_exec_object2 *entry; + + if (!obj->gtt_space) + continue; + + entry = obj->exec_entry; + if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { + i915_gem_object_unpin_fence(obj); + entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; + } + + i915_gem_object_unpin(obj); } if (ret != -ENOSPC || retry > 1) @@ -602,16 +640,19 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, } while (1); err: - obj = list_entry(obj->exec_list.prev, - struct drm_i915_gem_object, - exec_list); - while (objects != &obj->exec_list) { - if (obj->gtt_space) - i915_gem_object_unpin(obj); + list_for_each_entry_continue_reverse(obj, objects, exec_list) { + struct drm_i915_gem_exec_object2 *entry; + + if (!obj->gtt_space) + continue; + + entry = obj->exec_entry; + if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { + i915_gem_object_unpin_fence(obj); + entry->flags &= ~__EXEC_OBJECT_HAS_FENCE; + } - obj = list_entry(obj->exec_list.prev, - struct drm_i915_gem_object, - exec_list); + i915_gem_object_unpin(obj); } return ret; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 62f9e52..23e7b11 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1820,6 +1820,8 @@ intel_pin_and_fence_fb_obj(struct drm_device *dev, ret = i915_gem_object_get_fence(obj, pipelined); if (ret) goto err_unpin; + + i915_gem_object_pin_fence(obj); } dev_priv->mm.interruptible = true; @@ -1969,14 +1971,22 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, ret = intel_pipe_set_base_atomic(crtc, crtc->fb, x, y, LEAVE_ATOMIC_MODE_SET); if (ret) { - i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj); + struct drm_i915_gem_object *obj = + to_intel_framebuffer(crtc->fb)->obj; + + i915_gem_object_unpin_fence(obj); + i915_gem_object_unpin(obj); mutex_unlock(&dev->struct_mutex); return ret; } if (old_fb) { + struct drm_i915_gem_object *obj = + to_intel_framebuffer(old_fb)->obj; + intel_wait_for_vblank(dev, intel_crtc->pipe); - i915_gem_object_unpin(to_intel_framebuffer(old_fb)->obj); + i915_gem_object_unpin_fence(obj); + i915_gem_object_unpin(obj); } mutex_unlock(&dev->struct_mutex); @@ -2848,8 +2858,12 @@ static void intel_crtc_disable(struct drm_crtc *crtc) crtc_funcs->dpms(crtc, DRM_MODE_DPMS_OFF); if (crtc->fb) { + struct drm_i915_gem_object *obj = + to_intel_framebuffer(crtc->fb)->obj; + mutex_lock(&dev->struct_mutex); - i915_gem_object_unpin(to_intel_framebuffer(crtc->fb)->obj); + i915_gem_object_unpin_fence(obj); + i915_gem_object_unpin(obj); mutex_unlock(&dev->struct_mutex); } } @@ -5935,6 +5949,7 @@ static void intel_unpin_work_fn(struct work_struct *__work) container_of(__work, struct intel_unpin_work, work); mutex_lock(&work->dev->struct_mutex); + i915_gem_object_unpin_fence(work->old_fb_obj); i915_gem_object_unpin(work->old_fb_obj); drm_gem_object_unreference(&work->pending_flip_obj->base); drm_gem_object_unreference(&work->old_fb_obj->base); -- 1.7.5.4