From d6ccb25187f7eae1076ede8aeab420ef6f9b0eb9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Feb 2012 21:21:06 +0000 Subject: [PATCH] drm/i915: Record the tail at each request and use it to estimate the head This is an optimisation to prevent a performance regression incurred should we need to fallback to reading I915_RING_HEAD every time we run out of ring space. When emitting many batches in quick succession, such as in the middle of a RENDER/BLT switch storm, we may be emitting faster than the GPU is retiring and so the ring advances slowly and we have to read the registers (along with their forcewake dance) before every batch. By itself this patch has no significant impact. But does prevent the significant performance regression associated with (drm/i915: Remove use of the autoreported ringbuffer HEAD position) Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 5 +++ drivers/gpu/drm/i915/i915_gem.c | 12 ++++--- drivers/gpu/drm/i915/intel_ringbuffer.c | 58 ++++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + 4 files changed, 68 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4ee0793..c28451b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -945,6 +945,9 @@ struct drm_i915_gem_request { /** GEM sequence number associated with this request. */ uint32_t seqno; + /** Postion in the ringbuffer of the start of the request */ + u32 tail; + /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; @@ -1234,6 +1237,8 @@ i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) } void i915_gem_retire_requests(struct drm_device *dev); +void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring); + void i915_gem_reset(struct drm_device *dev); void i915_gem_clflush_object(struct drm_i915_gem_object *obj); int __must_check i915_gem_object_set_domain(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 51390b0..29dcd39 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1785,19 +1785,20 @@ i915_add_request(struct intel_ring_buffer *ring, drm_i915_private_t *dev_priv = ring->dev->dev_private; uint32_t seqno; int was_empty; - int ret; + int tail; BUG_ON(request == NULL); seqno = i915_gem_next_request_seqno(ring); - ret = ring->add_request(ring, &seqno); - if (ret) - return ret; + tail = ring->add_request(ring, &seqno); + if (tail < 0) + return tail; trace_i915_gem_request_add(ring, seqno); request->seqno = seqno; request->ring = ring; + request->tail = tail; request->emitted_jiffies = jiffies; was_empty = list_empty(&ring->request_list); list_add_tail(&request->list, &ring->request_list); @@ -1934,7 +1935,7 @@ void i915_gem_reset(struct drm_device *dev) /** * This function clears the request list as sequence numbers are passed. */ -static void +void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) { uint32_t seqno; @@ -1962,6 +1963,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) break; trace_i915_gem_request_retire(ring, request->seqno); + ring->last_retired_head = request->tail; list_del(&request->list); i915_gem_request_remove_from_client(request); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 620b2ed..1b1a3b6 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -458,7 +458,7 @@ gen6_add_request(struct intel_ring_buffer *ring, intel_ring_advance(ring); *result = seqno; - return 0; + return ring->tail; } int @@ -533,6 +533,7 @@ pc_render_add_request(struct intel_ring_buffer *ring, PIPE_CONTROL_FLUSH(ring, scratch_addr); scratch_addr += 128; PIPE_CONTROL_FLUSH(ring, scratch_addr); + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_WRITE_FLUSH | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | @@ -543,7 +544,7 @@ pc_render_add_request(struct intel_ring_buffer *ring, intel_ring_advance(ring); *result = seqno; - return 0; + return ring->tail; } static int @@ -564,7 +565,7 @@ render_ring_add_request(struct intel_ring_buffer *ring, intel_ring_advance(ring); *result = seqno; - return 0; + return ring->tail; } static u32 @@ -730,7 +731,7 @@ ring_add_request(struct intel_ring_buffer *ring, intel_ring_advance(ring); *result = seqno; - return 0; + return ring->tail; } static bool @@ -1054,11 +1055,60 @@ static int intel_wrap_ring_buffer(struct intel_ring_buffer *ring) return 0; } +static int intel_ring_wait_request(struct intel_ring_buffer *ring, int n) +{ + struct drm_i915_gem_request *request; + u32 seqno = 0; + int ret; + + if (ring->last_retired_head != -1) { + ring->head = ring->last_retired_head; + ring->last_retired_head = -1; + ring->space = ring_space(ring); + if (ring->space >= n) + return 0; + } + + list_for_each_entry(request, &ring->request_list, list) { + int space = request->tail - (ring->tail + 8); + if (space < 0) + space += ring->size; + if (space >= n) { + seqno = request->seqno; + break; + } + request->tail = 0; + } + + if (seqno == 0) + return -ENOSPC; + + ret = i915_wait_request(ring, seqno); + if (ret) + return ret; + + if (WARN_ON(ring->last_retired_head == -1)) + return -ENOSPC; + + ring->head = ring->last_retired_head; + ring->last_retired_head = -1; + ring->space = ring_space(ring); + if (WARN_ON(ring->space < n)) + return -ENOSPC; + + return 0; +} + int intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n) { struct drm_device *dev = ring->dev; struct drm_i915_private *dev_priv = dev->dev_private; unsigned long end; + int ret; + + ret = intel_ring_wait_request(ring, n); + if (ret != -ENOSPC) + return ret; trace_i915_ring_wait_begin(ring); if (drm_core_check_feature(dev, DRIVER_GEM)) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 4c5338c..63b1b9e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -46,6 +46,8 @@ struct intel_ring_buffer { int effective_size; struct intel_hw_status_page status_page; + u32 last_retired_head; + spinlock_t irq_lock; u32 irq_refcount; u32 irq_mask; -- 1.7.8.3