From 2fad434f63bd226ceb9fbd94d263263e678e1e84 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Aug 2017 13:37:06 +0100 Subject: [PATCH] drm/i915: Boost GPU clocks if we miss the pageflip's vblank MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we miss the current vblank because the gpu was busy, that may cause a jitter as the frame rate temporarily drops. We try to limit the impact of this by then boosting the GPU clock to deliver the frame as quickly as possible. Originally done in commit 6ad790c0f5ac ("drm/i915: Boost GPU frequency if we detect outstanding pageflips") but was never forward ported to atomic and finally dropped in commit fd3a40242e87 ("drm/i915: Rip out legacy page_flip completion/irq handling"). One of the most typical use-cases for this is a mostly idle desktop. Rendering one frame of the desktop's frontbuffer can easily be accomplished by the GPU running at low frequency, but often exceeds the time budget of the desktop compositor. The result is that animations such as opening the menu, doing a fullscreen switch, or even just trying to move a window around are slow and jerky. We need to respond within a frame to give the best impression of a smooth UX, as a compromise we instead respond if that first frame misses its goal. The result should be a near-imperceivable initial delay and a smooth animation even starting from idle. The cost, as ever, is that we spend more power than is strictly necessary as we overestimate the required GPU frequency and then try to ramp down. This of course is reactionary, too little, too late; nevertheless it is surprisingly effective. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102199 Signed-off-by: Chris Wilson Cc: Maarten Lankhorst Cc: Ville Syrjälä Cc: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170817123706.6777-1-chris@chris-wilson.co.uk Tested-by: Lyude Paul Reviewed-by: Radoslaw Szwichtenberg (cherry picked from commit 74d290f845d0736bf6b9dd22cd28dd87b270c65f) --- drivers/gpu/drm/i915/intel_display.c | 58 ++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_pm.c | 10 ++++--- 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f8efd20e4a90..ded238f50758 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include "intel_drv.h" @@ -14666,6 +14667,57 @@ static const struct drm_crtc_funcs intel_crtc_funcs = { .atomic_destroy_state = intel_crtc_destroy_state, }; +struct wait_rps_boost { + wait_queue_t wait; + + struct drm_crtc *crtc; + struct drm_i915_gem_request *request; +}; + +static int do_rps_boost(wait_queue_t *_wait, unsigned mode, int sync, void *key) +{ + struct wait_rps_boost *wait = container_of(_wait, typeof(*wait), wait); + struct drm_i915_gem_request *rq = wait->request; + + gen6_rps_boost(rq->i915, NULL, rq->emitted_jiffies); + i915_gem_request_put(rq); + + drm_crtc_vblank_put(wait->crtc); + + list_del(&wait->wait.task_list); + kfree(wait); + return 1; +} + +static void add_rps_boost_after_vblank(struct drm_crtc *crtc, + struct fence *fence) +{ + struct wait_rps_boost *wait; + + if (!fence_is_i915(fence)) + return; + + if (INTEL_GEN(to_i915(crtc->dev)) < 6) + return; + + if (drm_crtc_vblank_get(crtc)) + return; + + wait = kmalloc(sizeof(*wait), GFP_KERNEL); + if (!wait) { + drm_crtc_vblank_put(crtc); + return; + } + + wait->request = to_request(fence_get(fence)); + wait->crtc = crtc; + + wait->wait.func = do_rps_boost; + wait->wait.flags = 0; + + add_wait_queue(drm_crtc_vblank_waitqueue(crtc), &wait->wait); +} + /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @plane: drm plane to prepare for @@ -14749,9 +14801,15 @@ intel_prepare_plane_fb(struct drm_plane *plane, } if (ret == 0) { + struct fence *fence; + to_intel_plane_state(new_state)->wait_req = i915_gem_active_get(&obj->last_write, &obj->base.dev->struct_mutex); + + fence = &to_intel_plane_state(new_state)->wait_req->fence; + if (fence) + add_rps_boost_after_vblank(new_state->crtc, fence); } return ret; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 49de4760cc16..37ef79cd1c72 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5098,6 +5098,8 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, struct intel_rps_client *rps, unsigned long submitted) { + unsigned long flags; + /* This is intentionally racy! We peek at the state here, then * validate inside the RPS worker. */ @@ -5112,14 +5114,14 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES)) rps = NULL; - spin_lock(&dev_priv->rps.client_lock); + spin_lock_irqsave(&dev_priv->rps.client_lock, flags); if (rps == NULL || list_empty(&rps->link)) { - spin_lock_irq(&dev_priv->irq_lock); + spin_lock(&dev_priv->irq_lock); if (dev_priv->rps.interrupts_enabled) { dev_priv->rps.client_boost = true; schedule_work(&dev_priv->rps.work); } - spin_unlock_irq(&dev_priv->irq_lock); + spin_unlock(&dev_priv->irq_lock); if (rps != NULL) { list_add(&rps->link, &dev_priv->rps.clients); @@ -5127,7 +5129,7 @@ void gen6_rps_boost(struct drm_i915_private *dev_priv, } else dev_priv->rps.boosts++; } - spin_unlock(&dev_priv->rps.client_lock); + spin_unlock_irqrestore(&dev_priv->rps.client_lock, flags); } void intel_set_rps(struct drm_i915_private *dev_priv, u8 val) -- 2.14.1