From 8e9cceb623a7cc7d929a8aeeb0765234d249e07f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 12 Jul 2012 12:18:40 +0100 Subject: [PATCH] drm/i915: Super flushing list removal --- drivers/gpu/drm/i915/i915_debugfs.c | 16 +- drivers/gpu/drm/i915/i915_drv.h | 42 +--- drivers/gpu/drm/i915/i915_gem.c | 290 +++++++--------------------- drivers/gpu/drm/i915/i915_gem_evict.c | 20 -- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 277 ++------------------------ drivers/gpu/drm/i915/i915_irq.c | 5 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 40 +++- drivers/gpu/drm/i915/intel_ringbuffer.h | 11 +- 8 files changed, 147 insertions(+), 554 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 359f6e8..1312b79 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -44,7 +44,6 @@ enum { ACTIVE_LIST, - FLUSHING_LIST, INACTIVE_LIST, PINNED_LIST, }; @@ -121,14 +120,15 @@ static const char *cache_level_str(int type) static void describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) { - seq_printf(m, "%p: %s%s %8zdKiB %04x %04x %d %d%s%s%s", + seq_printf(m, "%p: %s%s %8zdKiB %04x %04x %d %d %d%s%s%s", &obj->base, get_pin_flag(obj), get_tiling_flag(obj), obj->base.size / 1024, obj->base.read_domains, obj->base.write_domain, - obj->last_rendering_seqno, + obj->last_read_seqno, + obj->last_write_seqno, obj->last_fenced_seqno, cache_level_str(obj->cache_level), obj->dirty ? " dirty" : "", @@ -177,10 +177,6 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data) seq_printf(m, "Inactive:\n"); head = &dev_priv->mm.inactive_list; break; - case FLUSHING_LIST: - seq_printf(m, "Flushing:\n"); - head = &dev_priv->mm.flushing_list; - break; default: mutex_unlock(&dev->struct_mutex); return -EINVAL; @@ -238,7 +234,6 @@ static int i915_gem_object_info(struct seq_file *m, void* data) size = count = mappable_size = mappable_count = 0; count_objects(&dev_priv->mm.active_list, mm_list); - count_objects(&dev_priv->mm.flushing_list, mm_list); seq_printf(m, " %u [%u] active objects, %zu [%zu] bytes\n", count, mappable_count, size, mappable_size); @@ -630,12 +625,12 @@ static void print_error_buffers(struct seq_file *m, seq_printf(m, "%s [%d]:\n", name, count); while (count--) { - seq_printf(m, " %08x %8u %04x %04x %08x%s%s%s%s%s%s%s", + seq_printf(m, " %08x %8u %04x %04x %x %x%s%s%s%s%s%s%s", err->gtt_offset, err->size, err->read_domains, err->write_domain, - err->seqno, + err->rseqno, err->wseqno, pin_flag(err->pinned), tiling_flag(err->tiling), dirty_flag(err->dirty), @@ -2006,7 +2001,6 @@ static struct drm_info_list i915_debugfs_list[] = { {"i915_gem_gtt", i915_gem_gtt_info, 0}, {"i915_gem_pinned", i915_gem_gtt_info, 0, (void *) PINNED_LIST}, {"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST}, - {"i915_gem_flushing", i915_gem_object_list_info, 0, (void *) FLUSHING_LIST}, {"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST}, {"i915_gem_pageflip", i915_gem_pageflip_info, 0}, {"i915_gem_request", i915_gem_request_info, 0}, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 627fe35..7ae333b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -221,7 +221,7 @@ struct drm_i915_error_state { struct drm_i915_error_buffer { u32 size; u32 name; - u32 seqno; + u32 rseqno, wseqno; u32 gtt_offset; u32 read_domains; u32 write_domain; @@ -696,17 +696,6 @@ typedef struct drm_i915_private { struct list_head active_list; /** - * List of objects which are not in the ringbuffer but which - * still have a write_domain which needs to be flushed before - * unbinding. - * - * last_rendering_seqno is 0 while an object is in this list. - * - * A reference is held on the buffer while on this list. - */ - struct list_head flushing_list; - - /** * LRU list of objects which are not in the ringbuffer and * are ready to unbind, but are still in the GTT. * @@ -873,18 +862,16 @@ struct drm_i915_gem_object { struct drm_mm_node *gtt_space; struct list_head gtt_list; - /** This object's place on the active/flushing/inactive lists */ + /** This object's place on the active/inactive lists */ struct list_head ring_list; struct list_head mm_list; - /** This object's place on GPU write list */ - struct list_head gpu_write_list; /** This object's place in the batchbuffer or on the eviction list */ struct list_head exec_list; /** - * This is set if the object is on the active or flushing lists - * (has pending rendering), and is not set if it's on inactive (ready - * to be unbound). + * This is set if the object is on the active lists (has pending + * rendering and so a non-zero seqno), and is not set if it i s on + * inactive (ready to be unbound) list. */ unsigned int active:1; @@ -895,12 +882,6 @@ struct drm_i915_gem_object { unsigned int dirty:1; /** - * This is set if the object has been written to since the last - * GPU flush. - */ - unsigned int pending_gpu_write:1; - - /** * Fence register bits (if any) for this object. Will be set * as needed when mapped into the GTT. * Protected by dev->struct_mutex. @@ -992,7 +973,8 @@ struct drm_i915_gem_object { struct intel_ring_buffer *ring; /** Breadcrumb of last rendering to the buffer. */ - uint32_t last_rendering_seqno; + uint32_t last_read_seqno; + uint32_t last_write_seqno; /** Breadcrumb of last fenced GPU access to the buffer. */ uint32_t last_fenced_seqno; @@ -1274,9 +1256,6 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); void i915_gem_load(struct drm_device *dev); int i915_gem_init_object(struct drm_gem_object *obj); -int __must_check i915_gem_flush_ring(struct intel_ring_buffer *ring, - uint32_t invalidate_domains, - uint32_t flush_domains); struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, size_t size); void i915_gem_free_object(struct drm_gem_object *obj); @@ -1291,7 +1270,6 @@ void i915_gem_lastclose(struct drm_device *dev); int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj, gfp_t gfpmask); int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); -int __must_check i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj); int i915_gem_object_sync(struct drm_i915_gem_object *obj, struct intel_ring_buffer *to); void i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, @@ -1358,9 +1336,9 @@ void i915_gem_init_ppgtt(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); int __must_check i915_gem_idle(struct drm_device *dev); -int __must_check i915_add_request(struct intel_ring_buffer *ring, - struct drm_file *file, - struct drm_i915_gem_request *request); +int i915_add_request(struct intel_ring_buffer *ring, + struct drm_file *file, + struct drm_i915_gem_request *request); int __must_check i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 53c3946..c4cedb6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -37,7 +37,6 @@ #include #include -static __must_check int i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); static __must_check int i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, @@ -1428,7 +1427,9 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = dev->dev_private; + BUG_ON(obj->base.write_domain & ~I915_GEM_GPU_DOMAINS); BUG_ON(ring == NULL); + obj->ring = ring; /* Add a reference if we're newly entering the active list. */ @@ -1441,7 +1442,14 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, list_move_tail(&obj->mm_list, &dev_priv->mm.active_list); list_move_tail(&obj->ring_list, &ring->active_list); - obj->last_rendering_seqno = seqno; + obj->last_read_seqno = seqno; + + if (obj->base.write_domain) { + obj->dirty = 1; + obj->last_write_seqno = seqno; + if (obj->pin_count) /* check for potential scanout */ + intel_mark_busy(dev, obj); + } if (obj->fenced_gpu_access) { obj->last_fenced_seqno = seqno; @@ -1458,26 +1466,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, } static void -i915_gem_object_move_off_active(struct drm_i915_gem_object *obj) -{ - list_del_init(&obj->ring_list); - obj->last_rendering_seqno = 0; - obj->last_fenced_seqno = 0; -} - -static void -i915_gem_object_move_to_flushing(struct drm_i915_gem_object *obj) -{ - struct drm_device *dev = obj->base.dev; - drm_i915_private_t *dev_priv = dev->dev_private; - - BUG_ON(!obj->active); - list_move_tail(&obj->mm_list, &dev_priv->mm.flushing_list); - - i915_gem_object_move_off_active(obj); -} - -static void i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) { struct drm_device *dev = obj->base.dev; @@ -1485,15 +1473,19 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) list_move_tail(&obj->mm_list, &dev_priv->mm.inactive_list); - BUG_ON(!list_empty(&obj->gpu_write_list)); BUG_ON(!obj->active); + + list_del_init(&obj->ring_list); obj->ring = NULL; - i915_gem_object_move_off_active(obj); + obj->last_read_seqno = 0; + obj->last_write_seqno = 0; + obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; + + obj->last_fenced_seqno = 0; obj->fenced_gpu_access = false; obj->active = 0; - obj->pending_gpu_write = false; drm_gem_object_unreference(&obj->base); WARN_ON(i915_verify_lists(dev)); @@ -1525,30 +1517,6 @@ i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj) return obj->madv == I915_MADV_DONTNEED; } -static void -i915_gem_process_flushing_list(struct intel_ring_buffer *ring, - uint32_t flush_domains) -{ - struct drm_i915_gem_object *obj, *next; - - list_for_each_entry_safe(obj, next, - &ring->gpu_write_list, - gpu_write_list) { - if (obj->base.write_domain & flush_domains) { - uint32_t old_write_domain = obj->base.write_domain; - - obj->base.write_domain = 0; - list_del_init(&obj->gpu_write_list); - i915_gem_object_move_to_active(obj, ring, - i915_gem_next_request_seqno(ring)); - - trace_i915_gem_object_change_domain(obj, - obj->base.read_domains, - old_write_domain); - } - } -} - static u32 i915_gem_get_seqno(struct drm_device *dev) { @@ -1589,15 +1557,16 @@ i915_add_request(struct intel_ring_buffer *ring, * is that the flush _must_ happen before the next request, no matter * what. */ - if (ring->gpu_caches_dirty) { - ret = i915_gem_flush_ring(ring, 0, I915_GEM_GPU_DOMAINS); - if (ret) - return ret; + ret = intel_ring_flush_all_caches(ring); + if (ret) + return ret; - ring->gpu_caches_dirty = false; + if (request == NULL) { + request = kmalloc(sizeof(*request), GFP_KERNEL); + if (request == NULL) + return -ENOMEM; } - BUG_ON(request == NULL); seqno = i915_gem_next_request_seqno(ring); /* Record the position of the start of the request so that @@ -1608,8 +1577,10 @@ i915_add_request(struct intel_ring_buffer *ring, request_ring_position = intel_ring_get_tail(ring); ret = ring->add_request(ring, &seqno); - if (ret) - return ret; + if (ret) { + kfree(request); + return ret; + } trace_i915_gem_request_add(ring, seqno); @@ -1643,8 +1614,6 @@ i915_add_request(struct intel_ring_buffer *ring, &dev_priv->mm.retire_work, HZ); } - WARN_ON(!list_empty(&ring->gpu_write_list)); - return 0; } @@ -1686,8 +1655,6 @@ static void i915_gem_reset_ring_lists(struct drm_i915_private *dev_priv, struct drm_i915_gem_object, ring_list); - obj->base.write_domain = 0; - list_del_init(&obj->gpu_write_list); i915_gem_object_move_to_inactive(obj); } } @@ -1723,20 +1690,6 @@ void i915_gem_reset(struct drm_device *dev) for_each_ring(ring, dev_priv, i) i915_gem_reset_ring_lists(dev_priv, ring); - /* Remove anything from the flushing lists. The GPU cache is likely - * to be lost on reset along with the data, so simply move the - * lost bo to the inactive list. - */ - while (!list_empty(&dev_priv->mm.flushing_list)) { - obj = list_first_entry(&dev_priv->mm.flushing_list, - struct drm_i915_gem_object, - mm_list); - - obj->base.write_domain = 0; - list_del_init(&obj->gpu_write_list); - i915_gem_object_move_to_inactive(obj); - } - /* Move everything out of the GPU domains to ensure we do any * necessary invalidation upon reuse. */ @@ -1804,13 +1757,10 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring) struct drm_i915_gem_object, ring_list); - if (!i915_seqno_passed(seqno, obj->last_rendering_seqno)) + if (!i915_seqno_passed(seqno, obj->last_read_seqno)) break; - if (obj->base.write_domain != 0) - i915_gem_object_move_to_flushing(obj); - else - i915_gem_object_move_to_inactive(obj); + i915_gem_object_move_to_inactive(obj); } if (unlikely(ring->trace_irq_seqno && @@ -1859,14 +1809,8 @@ i915_gem_retire_work_handler(struct work_struct *work) */ idle = true; for_each_ring(ring, dev_priv, i) { - if (ring->gpu_caches_dirty) { - struct drm_i915_gem_request *request; - - request = kzalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL || - i915_add_request(ring, NULL, request)) - kfree(request); - } + if (ring->gpu_caches_dirty) + i915_add_request(ring, NULL, NULL); idle &= list_empty(&ring->request_list); } @@ -1913,25 +1857,13 @@ i915_gem_check_wedge(struct drm_i915_private *dev_priv, static int i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) { - int ret = 0; + int ret; BUG_ON(!mutex_is_locked(&ring->dev->struct_mutex)); - if (seqno == ring->outstanding_lazy_request) { - struct drm_i915_gem_request *request; - - request = kzalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL) - return -ENOMEM; - - ret = i915_add_request(ring, NULL, request); - if (ret) { - kfree(request); - return ret; - } - - BUG_ON(seqno != request->seqno); - } + ret = 0; + if (seqno == ring->outstanding_lazy_request) + ret = i915_add_request(ring, NULL, NULL); return ret; } @@ -2045,26 +1977,37 @@ i915_wait_seqno(struct intel_ring_buffer *ring, uint32_t seqno) * Ensures that all rendering to the object has completed and the object is * safe to unbind from the GTT or access from the CPU. */ -int -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj) +static __must_check int +i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, + bool readonly) { + u32 seqno; int ret; - /* This function only exists to support waiting for existing rendering, - * not for emitting required flushes. - */ - BUG_ON((obj->base.write_domain & I915_GEM_GPU_DOMAINS) != 0); - /* If there is rendering queued on the buffer being evicted, wait for * it. */ - if (obj->active) { - ret = i915_wait_seqno(obj->ring, obj->last_rendering_seqno); - if (ret) - return ret; - i915_gem_retire_requests_ring(obj->ring); + if (readonly) + seqno = obj->last_write_seqno; + else + seqno = obj->last_read_seqno; + if (seqno == 0) + return 0; + + ret = i915_wait_seqno(obj->ring, seqno); + if (ret) + return ret; + + /* Manually manage the write flush as we may have not yet retired + * the buffer. + */ + if (obj->last_write_seqno && + i915_seqno_passed(seqno, obj->last_write_seqno)) { + obj->last_write_seqno = 0; + obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; } + i915_gem_retire_requests_ring(obj->ring); return 0; } @@ -2079,14 +2022,10 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj) int ret; if (obj->active) { - ret = i915_gem_object_flush_gpu_write_domain(obj); + ret = i915_gem_check_olr(obj->ring, obj->last_read_seqno); if (ret) return ret; - ret = i915_gem_check_olr(obj->ring, - obj->last_rendering_seqno); - if (ret) - return ret; i915_gem_retire_requests_ring(obj->ring); } @@ -2146,7 +2085,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) goto out; if (obj->active) { - seqno = obj->last_rendering_seqno; + seqno = obj->last_read_seqno; ring = obj->ring; } @@ -2201,11 +2140,11 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, return 0; if (to == NULL || !i915_semaphore_is_enabled(obj->base.dev)) - return i915_gem_object_wait_rendering(obj); + return i915_gem_object_wait_rendering(obj, false); idx = intel_ring_sync_index(from, to); - seqno = obj->last_rendering_seqno; + seqno = obj->last_read_seqno; if (seqno <= from->sync_seqno[idx]) return 0; @@ -2317,42 +2256,11 @@ i915_gem_object_unbind(struct drm_i915_gem_object *obj) return ret; } -int -i915_gem_flush_ring(struct intel_ring_buffer *ring, - uint32_t invalidate_domains, - uint32_t flush_domains) -{ - int ret; - - if (((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) == 0) - return 0; - - trace_i915_gem_ring_flush(ring, invalidate_domains, flush_domains); - - ret = ring->flush(ring, invalidate_domains, flush_domains); - if (ret) - return ret; - - if (flush_domains & I915_GEM_GPU_DOMAINS) - i915_gem_process_flushing_list(ring, flush_domains); - - return 0; -} - static int i915_ring_idle(struct intel_ring_buffer *ring) { - int ret; - - if (list_empty(&ring->gpu_write_list) && list_empty(&ring->active_list)) + if (list_empty(&ring->active_list)) return 0; - if (!list_empty(&ring->gpu_write_list)) { - ret = i915_gem_flush_ring(ring, - I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); - if (ret) - return ret; - } - return i915_wait_seqno(ring, i915_gem_next_request_seqno(ring)); } @@ -2368,10 +2276,6 @@ int i915_gpu_idle(struct drm_device *dev) if (ret) return ret; - /* Is the device fubar? */ - if (WARN_ON(!list_empty(&ring->gpu_write_list))) - return -EBUSY; - ret = i915_switch_context(ring, NULL, DEFAULT_CONTEXT_ID); if (ret) return ret; @@ -2547,21 +2451,8 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, static int i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) { - int ret; - - if (obj->fenced_gpu_access) { - if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->ring, - 0, obj->base.write_domain); - if (ret) - return ret; - } - - obj->fenced_gpu_access = false; - } - if (obj->last_fenced_seqno) { - ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); + int ret = i915_wait_seqno(obj->ring, obj->last_fenced_seqno); if (ret) return ret; @@ -2574,6 +2465,7 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj) if (obj->base.read_domains & I915_GEM_DOMAIN_GTT) mb(); + obj->fenced_gpu_access = false; return 0; } @@ -2868,17 +2760,6 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj) drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE); } -/** Flushes any GPU write domain for the object if it's dirty. */ -static int -i915_gem_object_flush_gpu_write_domain(struct drm_i915_gem_object *obj) -{ - if ((obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0) - return 0; - - /* Queue the GPU write cache flushing we need. */ - return i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); -} - /** Flushes the GTT write domain for the object if it's dirty. */ static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) @@ -2945,16 +2826,10 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (obj->base.write_domain == I915_GEM_DOMAIN_GTT) return 0; - ret = i915_gem_object_flush_gpu_write_domain(obj); + ret = i915_gem_object_wait_rendering(obj, !write); if (ret) return ret; - if (obj->pending_gpu_write || write) { - ret = i915_gem_object_wait_rendering(obj); - if (ret) - return ret; - } - i915_gem_object_flush_cpu_write_domain(obj); old_write_domain = obj->base.write_domain; @@ -3061,10 +2936,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 old_read_domains, old_write_domain; int ret; - ret = i915_gem_object_flush_gpu_write_domain(obj); - if (ret) - return ret; - if (pipelined != obj->ring) { ret = i915_gem_object_sync(obj, pipelined); if (ret) @@ -3118,13 +2989,7 @@ i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj) if ((obj->base.read_domains & I915_GEM_GPU_DOMAINS) == 0) return 0; - if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); - if (ret) - return ret; - } - - ret = i915_gem_object_wait_rendering(obj); + ret = i915_gem_object_wait_rendering(obj, false); if (ret) return ret; @@ -3148,16 +3013,10 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) return 0; - ret = i915_gem_object_flush_gpu_write_domain(obj); + ret = i915_gem_object_wait_rendering(obj, !write); if (ret) return ret; - if (write || obj->pending_gpu_write) { - ret = i915_gem_object_wait_rendering(obj); - if (ret) - return ret; - } - i915_gem_object_flush_gtt_write_domain(obj); old_write_domain = obj->base.write_domain; @@ -3516,7 +3375,6 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, INIT_LIST_HEAD(&obj->gtt_list); INIT_LIST_HEAD(&obj->ring_list); INIT_LIST_HEAD(&obj->exec_list); - INIT_LIST_HEAD(&obj->gpu_write_list); obj->madv = I915_MADV_WILLNEED; /* Avoid an unnecessary call to unbind on the first bind. */ obj->map_and_fenceable = true; @@ -3890,7 +3748,6 @@ i915_gem_entervt_ioctl(struct drm_device *dev, void *data, } BUG_ON(!list_empty(&dev_priv->mm.active_list)); - BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); BUG_ON(!list_empty(&dev_priv->mm.inactive_list)); mutex_unlock(&dev->struct_mutex); @@ -3938,7 +3795,6 @@ init_ring_lists(struct intel_ring_buffer *ring) { INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); - INIT_LIST_HEAD(&ring->gpu_write_list); } void @@ -3948,7 +3804,6 @@ i915_gem_load(struct drm_device *dev) drm_i915_private_t *dev_priv = dev->dev_private; INIT_LIST_HEAD(&dev_priv->mm.active_list); - INIT_LIST_HEAD(&dev_priv->mm.flushing_list); INIT_LIST_HEAD(&dev_priv->mm.inactive_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); INIT_LIST_HEAD(&dev_priv->mm.gtt_list); @@ -4199,12 +4054,7 @@ static int i915_gpu_is_active(struct drm_device *dev) { drm_i915_private_t *dev_priv = dev->dev_private; - int lists_empty; - - lists_empty = list_empty(&dev_priv->mm.flushing_list) && - list_empty(&dev_priv->mm.active_list); - - return !lists_empty; + return !list_empty(&dev_priv->mm.active_list); } static int diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index ae7c24e..f61af59 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -92,23 +92,6 @@ i915_gem_evict_something(struct drm_device *dev, int min_size, /* Now merge in the soon-to-be-expired objects... */ list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) { - /* Does the object require an outstanding flush? */ - if (obj->base.write_domain) - continue; - - if (mark_free(obj, &unwind_list)) - goto found; - } - - /* Finally add anything with a pending flush (in order of retirement) */ - list_for_each_entry(obj, &dev_priv->mm.flushing_list, mm_list) { - if (mark_free(obj, &unwind_list)) - goto found; - } - list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) { - if (!obj->base.write_domain) - continue; - if (mark_free(obj, &unwind_list)) goto found; } @@ -171,7 +154,6 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only) int ret; lists_empty = (list_empty(&dev_priv->mm.inactive_list) && - list_empty(&dev_priv->mm.flushing_list) && list_empty(&dev_priv->mm.active_list)); if (lists_empty) return -ENOSPC; @@ -188,8 +170,6 @@ i915_gem_evict_everything(struct drm_device *dev, bool purgeable_only) i915_gem_retire_requests(dev); - BUG_ON(!list_empty(&dev_priv->mm.flushing_list)); - /* Having flushed everything, unbind() should never raise an error */ list_for_each_entry_safe(obj, next, &dev_priv->mm.inactive_list, mm_list) { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 88e2e11..44f5b22 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -34,180 +34,6 @@ #include "intel_drv.h" #include -struct change_domains { - uint32_t invalidate_domains; - uint32_t flush_domains; - uint32_t flush_rings; - uint32_t flips; -}; - -/* - * Set the next domain for the specified object. This - * may not actually perform the necessary flushing/invaliding though, - * as that may want to be batched with other set_domain operations - * - * This is (we hope) the only really tricky part of gem. The goal - * is fairly simple -- track which caches hold bits of the object - * and make sure they remain coherent. A few concrete examples may - * help to explain how it works. For shorthand, we use the notation - * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the - * a pair of read and write domain masks. - * - * Case 1: the batch buffer - * - * 1. Allocated - * 2. Written by CPU - * 3. Mapped to GTT - * 4. Read by GPU - * 5. Unmapped from GTT - * 6. Freed - * - * Let's take these a step at a time - * - * 1. Allocated - * Pages allocated from the kernel may still have - * cache contents, so we set them to (CPU, CPU) always. - * 2. Written by CPU (using pwrite) - * The pwrite function calls set_domain (CPU, CPU) and - * this function does nothing (as nothing changes) - * 3. Mapped by GTT - * This function asserts that the object is not - * currently in any GPU-based read or write domains - * 4. Read by GPU - * i915_gem_execbuffer calls set_domain (COMMAND, 0). - * As write_domain is zero, this function adds in the - * current read domains (CPU+COMMAND, 0). - * flush_domains is set to CPU. - * invalidate_domains is set to COMMAND - * clflush is run to get data out of the CPU caches - * then i915_dev_set_domain calls i915_gem_flush to - * emit an MI_FLUSH and drm_agp_chipset_flush - * 5. Unmapped from GTT - * i915_gem_object_unbind calls set_domain (CPU, CPU) - * flush_domains and invalidate_domains end up both zero - * so no flushing/invalidating happens - * 6. Freed - * yay, done - * - * Case 2: The shared render buffer - * - * 1. Allocated - * 2. Mapped to GTT - * 3. Read/written by GPU - * 4. set_domain to (CPU,CPU) - * 5. Read/written by CPU - * 6. Read/written by GPU - * - * 1. Allocated - * Same as last example, (CPU, CPU) - * 2. Mapped to GTT - * Nothing changes (assertions find that it is not in the GPU) - * 3. Read/written by GPU - * execbuffer calls set_domain (RENDER, RENDER) - * flush_domains gets CPU - * invalidate_domains gets GPU - * clflush (obj) - * MI_FLUSH and drm_agp_chipset_flush - * 4. set_domain (CPU, CPU) - * flush_domains gets GPU - * invalidate_domains gets CPU - * wait_rendering (obj) to make sure all drawing is complete. - * This will include an MI_FLUSH to get the data from GPU - * to memory - * clflush (obj) to invalidate the CPU cache - * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) - * 5. Read/written by CPU - * cache lines are loaded and dirtied - * 6. Read written by GPU - * Same as last GPU access - * - * Case 3: The constant buffer - * - * 1. Allocated - * 2. Written by CPU - * 3. Read by GPU - * 4. Updated (written) by CPU again - * 5. Read by GPU - * - * 1. Allocated - * (CPU, CPU) - * 2. Written by CPU - * (CPU, CPU) - * 3. Read by GPU - * (CPU+RENDER, 0) - * flush_domains = CPU - * invalidate_domains = RENDER - * clflush (obj) - * MI_FLUSH - * drm_agp_chipset_flush - * 4. Updated (written) by CPU again - * (CPU, CPU) - * flush_domains = 0 (no previous write domain) - * invalidate_domains = 0 (no new read domains) - * 5. Read by GPU - * (CPU+RENDER, 0) - * flush_domains = CPU - * invalidate_domains = RENDER - * clflush (obj) - * MI_FLUSH - * drm_agp_chipset_flush - */ -static void -i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, - struct intel_ring_buffer *ring, - struct change_domains *cd) -{ - uint32_t invalidate_domains = 0, flush_domains = 0; - - /* - * If the object isn't moving to a new write domain, - * let the object stay in multiple read domains - */ - if (obj->base.pending_write_domain == 0) - obj->base.pending_read_domains |= obj->base.read_domains; - - /* - * Flush the current write domain if - * the new read domains don't match. Invalidate - * any read domains which differ from the old - * write domain - */ - if (obj->base.write_domain && - (((obj->base.write_domain != obj->base.pending_read_domains || - obj->ring != ring)) || - (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) { - flush_domains |= obj->base.write_domain; - invalidate_domains |= - obj->base.pending_read_domains & ~obj->base.write_domain; - } - /* - * Invalidate any read caches which may have - * stale data. That is, any new read domains. - */ - invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains; - if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) - i915_gem_clflush_object(obj); - - if (obj->base.pending_write_domain) - cd->flips |= atomic_read(&obj->pending_flip); - - /* The actual obj->write_domain will be updated with - * pending_write_domain after we emit the accumulated flush for all - * of our domain changes in execbuffers (which clears objects' - * write_domains). So if we have a current write domain that we - * aren't changing, set pending_write_domain to that. - */ - if (flush_domains == 0 && obj->base.pending_write_domain == 0) - obj->base.pending_write_domain = obj->base.write_domain; - - cd->invalidate_domains |= invalidate_domains; - cd->flush_domains |= flush_domains; - if (flush_domains & I915_GEM_GPU_DOMAINS) - cd->flush_rings |= intel_ring_flag(obj->ring); - if (invalidate_domains & I915_GEM_GPU_DOMAINS) - cd->flush_rings |= intel_ring_flag(ring); -} - struct eb_objects { int and; struct hlist_head buckets[0]; @@ -587,6 +413,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, obj->base.pending_read_domains = 0; obj->base.pending_write_domain = 0; + obj->pending_fenced_gpu_access = false; } list_splice(&ordered_objects, objects); @@ -810,87 +637,35 @@ err: return ret; } -static void -i915_gem_execbuffer_flush(struct drm_device *dev, - uint32_t invalidate_domains, - uint32_t flush_domains) -{ - if (flush_domains & I915_GEM_DOMAIN_CPU) - intel_gtt_chipset_flush(); - - if (flush_domains & I915_GEM_DOMAIN_GTT) - wmb(); -} - -static int -i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips) -{ - u32 plane, flip_mask; - int ret; - - /* Check for any pending flips. As we only maintain a flip queue depth - * of 1, we can simply insert a WAIT for the next display flip prior - * to executing the batch and avoid stalling the CPU. - */ - - for (plane = 0; flips >> plane; plane++) { - if (((flips >> plane) & 1) == 0) - continue; - - if (plane) - flip_mask = MI_WAIT_FOR_PLANE_B_FLIP; - else - flip_mask = MI_WAIT_FOR_PLANE_A_FLIP; - - ret = intel_ring_begin(ring, 2); - if (ret) - return ret; - - intel_ring_emit(ring, MI_WAIT_FOR_EVENT | flip_mask); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - } - - return 0; -} - - static int i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, struct list_head *objects) { struct drm_i915_gem_object *obj; - struct change_domains cd; + uint32_t flush_domains = 0; int ret; - memset(&cd, 0, sizeof(cd)); - list_for_each_entry(obj, objects, exec_list) - i915_gem_object_set_to_gpu_domain(obj, ring, &cd); - - if (cd.invalidate_domains | cd.flush_domains) { - i915_gem_execbuffer_flush(ring->dev, - cd.invalidate_domains, - cd.flush_domains); - } - - if (cd.flips) { - ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips); - if (ret) - return ret; - } - list_for_each_entry(obj, objects, exec_list) { ret = i915_gem_object_sync(obj, ring); if (ret) return ret; + + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) + i915_gem_clflush_object(obj); + + flush_domains |= obj->base.write_domain; } - /* Unconditionally invalidate gpu caches. */ - ret = i915_gem_flush_ring(ring, I915_GEM_GPU_DOMAINS, 0); - if (ret) - return ret; + if (flush_domains & I915_GEM_DOMAIN_CPU) + intel_gtt_chipset_flush(); - return 0; + if (flush_domains & I915_GEM_DOMAIN_GTT) + wmb(); + + /* Unconditionally invalidate gpu caches and ensure that we do flush + * any residual writes from the previous batch. + */ + return intel_ring_invalidate_all_caches(ring); } static bool @@ -938,23 +713,14 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects, struct drm_i915_gem_object *obj; list_for_each_entry(obj, objects, exec_list) { - u32 old_read = obj->base.read_domains; - u32 old_write = obj->base.write_domain; - + u32 old_read = obj->base.read_domains; + u32 old_write = obj->base.write_domain; obj->base.read_domains = obj->base.pending_read_domains; obj->base.write_domain = obj->base.pending_write_domain; obj->fenced_gpu_access = obj->pending_fenced_gpu_access; i915_gem_object_move_to_active(obj, ring, seqno); - if (obj->base.write_domain) { - obj->dirty = 1; - obj->pending_gpu_write = true; - list_move_tail(&obj->gpu_write_list, - &ring->gpu_write_list); - if (obj->pin_count) /* check for potential scanout */ - intel_mark_busy(ring->dev, obj); - } trace_i915_gem_object_change_domain(obj, old_read, old_write); } @@ -967,16 +733,11 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev, struct drm_file *file, struct intel_ring_buffer *ring) { - struct drm_i915_gem_request *request; - /* Unconditionally force add_request to emit a full flush. */ ring->gpu_caches_dirty = true; /* Add a breadcrumb for the completion of the batch buffer */ - request = kzalloc(sizeof(*request), GFP_KERNEL); - if (request == NULL || i915_add_request(ring, file, request)) { - kfree(request); - } + (void)i915_add_request(ring, file, NULL); } static int diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 566f61b..41ed41d 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -950,7 +950,8 @@ static void capture_bo(struct drm_i915_error_buffer *err, { err->size = obj->base.size; err->name = obj->base.name; - err->seqno = obj->last_rendering_seqno; + err->rseqno = obj->last_read_seqno; + err->wseqno = obj->last_write_seqno; err->gtt_offset = obj->gtt_offset; err->read_domains = obj->base.read_domains; err->write_domain = obj->base.write_domain; @@ -1045,7 +1046,7 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, if (obj->ring != ring) continue; - if (i915_seqno_passed(seqno, obj->last_rendering_seqno)) + if (i915_seqno_passed(seqno, obj->last_read_seqno)) continue; if ((obj->base.read_domains & I915_GEM_DOMAIN_COMMAND) == 0) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ddc4859..81698cf 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1001,7 +1001,6 @@ static int intel_init_ring_buffer(struct drm_device *dev, ring->dev = dev; INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); - INIT_LIST_HEAD(&ring->gpu_write_list); ring->size = 32 * PAGE_SIZE; init_waitqueue_head(&ring->irq_queue); @@ -1472,7 +1471,6 @@ int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size) ring->dev = dev; INIT_LIST_HEAD(&ring->active_list); INIT_LIST_HEAD(&ring->request_list); - INIT_LIST_HEAD(&ring->gpu_write_list); ring->size = size; ring->effective_size = ring->size; @@ -1565,3 +1563,41 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) return intel_init_ring_buffer(dev, ring); } + +int +intel_ring_flush_all_caches(struct intel_ring_buffer *ring) +{ + int ret; + + if (!ring->gpu_caches_dirty) + return 0; + + ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); + if (ret) + return ret; + + trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS); + + ring->gpu_caches_dirty = false; + return 0; +} + +int +intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring) +{ + uint32_t flush_domains; + int ret; + + flush_domains = 0; + if (ring->gpu_caches_dirty) + flush_domains = I915_GEM_GPU_DOMAINS; + + ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); + if (ret) + return ret; + + trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); + + ring->gpu_caches_dirty = false; + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 1d3c81f..8b2b92e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -101,15 +101,6 @@ struct intel_ring_buffer { struct list_head request_list; /** - * List of objects currently pending a GPU write flush. - * - * All elements on this list will belong to either the - * active_list or flushing_list, last_rendering_seqno can - * be used to differentiate between the two elements. - */ - struct list_head gpu_write_list; - - /** * Do we have some not yet emitted requests outstanding? */ u32 outstanding_lazy_request; @@ -204,6 +195,8 @@ static inline void intel_ring_emit(struct intel_ring_buffer *ring, void intel_ring_advance(struct intel_ring_buffer *ring); u32 intel_ring_get_seqno(struct intel_ring_buffer *ring); +int intel_ring_flush_all_caches(struct intel_ring_buffer *ring); +int intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring); int intel_init_render_ring_buffer(struct drm_device *dev); int intel_init_bsd_ring_buffer(struct drm_device *dev); -- 1.7.10.4