From 5d1d5231bbc73f2ad0600727fa83619402f75800 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 4 Sep 2013 00:17:36 +0100 Subject: [PATCH] drm/i915: Hold an object reference whilst we shrink it Whilst running the shrinker, we need to hold a reference as we unbind the objects, or else we may end up waiting for and retiring requests, which in turn may result in this object being freed. This is very similar to the eviction code which also has to be very careful to keep a reference to its objects as it retires and unbinds them. Another similarity, that Ben pointed out, is that as we may call retire-requests, the unbound_list is outside of our control. We must only process a single element of that list at a time, that is we can not rely on the "safe" next pointer being valid after a call to i915_vma_unbind(). Sep 3 00:12:19 x-hswu33 kernel: [22027.290905] BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 Sep 3 00:12:19 x-hswu33 kernel: [22027.290971] IP: [] i915_gem_gtt_finish_object+0x68/0xbd [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.291047] PGD 758d3067 PUD ac0d6067 PMD 0 Sep 3 00:12:19 x-hswu33 kernel: [22027.291085] Oops: 0000 [#1] SMP Sep 3 00:12:19 x-hswu33 kernel: [22027.291110] Modules linked in: dm_mod snd_hda_codec_realtek iTCO_wdt iTCO_vendor_support pcspkr snd_hda_intel i2c_i801 snd_hda_codec snd_hwdep snd_pcm snd_page_alloc snd_timer snd lpc_ich mfd_core soundcore battery ac option usb_wwan usbserial uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_core videodev i915 video button drm_kms_helper drm acpi_cpufreq mperf freq_table Sep 3 00:12:19 x-hswu33 kernel: [22027.291392] CPU: 1 PID: 16835 Comm: fbo-maxsize Not tainted 3.11.0-rc7_nightlytop_8fdad4_20130902_+ #7977 Sep 3 00:12:19 x-hswu33 kernel: [22027.291460] task: ffff8800712106d0 ti: ffff880028e4a000 task.ti: ffff880028e4a000 Sep 3 00:12:19 x-hswu33 kernel: [22027.291518] RIP: 0010:[] [] i915_gem_gtt_finish_object+0x68/0xbd [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.291602] RSP: 0018:ffff880028e4b9e8 EFLAGS: 00010246 Sep 3 00:12:19 x-hswu33 kernel: [22027.291640] RAX: 0000000000000000 RBX: ffff880145734000 RCX: ffff880145735328 Sep 3 00:12:19 x-hswu33 kernel: [22027.291689] RDX: ffff8801457353fc RSI: 0000000000000000 RDI: ffff88007597cc00 Sep 3 00:12:19 x-hswu33 kernel: [22027.291738] RBP: ffff88007597cc00 R08: 0000000000000001 R09: ffff88014f257f00 Sep 3 00:12:19 x-hswu33 kernel: [22027.291787] R10: ffffea0001d65f00 R11: 0000000000bba60b R12: ffff880149e5b000 Sep 3 00:12:19 x-hswu33 kernel: [22027.291837] R13: ffff880145734001 R14: ffff88007597ccc8 R15: ffff88007597cc00 Sep 3 00:12:19 x-hswu33 kernel: [22027.291887] FS: 00007ff5bc919740(0000) GS:ffff88014f240000(0000) knlGS:0000000000000000 Sep 3 00:12:19 x-hswu33 kernel: [22027.291943] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 Sep 3 00:12:19 x-hswu33 kernel: [22027.291983] CR2: 0000000000000008 CR3: 0000000028f4c000 CR4: 00000000001407e0 Sep 3 00:12:19 x-hswu33 kernel: [22027.292032] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 Sep 3 00:12:19 x-hswu33 kernel: [22027.292081] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Sep 3 00:12:19 x-hswu33 kernel: [22027.292130] Stack: Sep 3 00:12:19 x-hswu33 kernel: [22027.292145] 0000000000000000 ffff88007597cc00 ffff8801440d6840 0000000000000000 Sep 3 00:12:19 x-hswu33 kernel: [22027.292202] ffff880145734000 ffffffffa007c854 0000000000000010 ffff88007597c900 Sep 3 00:12:19 x-hswu33 kernel: [22027.292261] 0000000000018000 00000000004a1201 ffff88007597cc60 ffffffffa007d183 Sep 3 00:12:19 x-hswu33 kernel: [22027.292317] Call Trace: Sep 3 00:12:19 x-hswu33 kernel: [22027.292350] [] ? i915_vma_unbind+0xe2/0x1d1 [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.292410] [] ? __i915_gem_shrink+0xf1/0x162 [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.292464] [] ? i915_gem_object_get_pages_gtt+0xfa/0x303 [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.292527] [] ? i915_gem_object_get_pages+0x54/0x89 [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.292586] [] ? i915_gem_object_pin+0x238/0x5ce [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.292638] [] ? __sg_page_iter_next+0x2b/0x58 Sep 3 00:12:19 x-hswu33 kernel: [22027.292694] [] ? gen6_ppgtt_insert_entries+0xf2/0x114 [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.292754] [] ? i915_gem_execbuffer_reserve_vma.isra.13+0x79/0x18d [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.292820] [] ? i915_gem_execbuffer_reserve+0x21d/0x347 [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.292881] [] ? i915_gem_do_execbuffer.isra.17+0x4f3/0xe61 [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.292943] [] ? i915_gem_object_get_pages+0x54/0x89 [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.293002] [] ? i915_gem_pwrite_ioctl+0x743/0x7a5 [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.293060] [] ? i915_gem_execbuffer2+0x15e/0x1e4 [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.293123] [] ? drm_ioctl+0x2a5/0x3c4 [drm] Sep 3 00:12:19 x-hswu33 kernel: [22027.293173] [] ? i915_gem_execbuffer+0x37f/0x37f [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.293224] [] ? __do_page_fault+0x3ab/0x449 Sep 3 00:12:19 x-hswu33 kernel: [22027.293269] [] ? do_mmap_pgoff+0x2b2/0x341 Sep 3 00:12:19 x-hswu33 kernel: [22027.293317] [] ? vfs_ioctl+0x1e/0x31 Sep 3 00:12:19 x-hswu33 kernel: [22027.293354] [] ? do_vfs_ioctl+0x3ad/0x3ef Sep 3 00:12:19 x-hswu33 kernel: [22027.293396] [] ? SyS_ioctl+0x4e/0x7e Sep 3 00:12:19 x-hswu33 kernel: [22027.293435] [] ? system_call_fastpath+0x16/0x1b Sep 3 00:12:19 x-hswu33 kernel: [22027.293478] Code: 52 0c a0 48 c7 c6 22 30 0d a0 31 c0 e8 ef 00 f9 ff bf c6 a7 00 00 e8 90 5d 24 e1 f6 85 13 01 00 00 10 75 44 48 8b 85 18 01 00 00 <8b> 50 08 48 8b 30 49 8b 84 24 88 02 00 00 48 89 c7 48 81 c7 98 Sep 3 00:12:19 x-hswu33 kernel: [22027.293678] RIP [] i915_gem_gtt_finish_object+0x68/0xbd [i915] Sep 3 00:12:19 x-hswu33 kernel: [22027.293746] RSP Sep 3 00:12:19 x-hswu33 kernel: [22027.293773] CR2: 0000000000000008 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=68171 Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8e414cf..d8ef227 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1839,6 +1839,7 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, { struct drm_i915_gem_object *obj, *next; unsigned long count = 0; + struct list_head still_bound_list; lockdep_assert_held(&dev_priv->dev->struct_mutex); @@ -1853,23 +1854,44 @@ __i915_gem_shrink(struct drm_i915_private *dev_priv, } } - list_for_each_entry_safe(obj, next, &dev_priv->mm.bound_list, - global_list) { + /* As we may completely rewrite the bound list whilst unbinding + * (due to retiring requests) we have to strictly process only + * one element of the list at the time, and recheck the list + * on every iteration. + */ + INIT_LIST_HEAD(&still_bound_list); + while (count < target && !list_empty(&dev_priv->mm.bound_list)) { struct i915_vma *vma, *v; - if (!i915_gem_object_is_purgeable(obj) && purgeable_only) + obj = list_first_entry(&dev_priv->mm.bound_list, + typeof(*obj), global_list); + + if (!i915_gem_object_is_purgeable(obj) && purgeable_only) { + list_move_tail(&obj->global_list, &still_bound_list); continue; + } + + /* Hold a reference whilst we unbind this object, as we may + * end up waiting for and retiring requests, which may result + * in this object being freed. + * + * Note that the shrinker and eviction is special as they operate + * on the inactive lists which reference limbo objects. + */ + drm_gem_object_reference(&obj->base); list_for_each_entry_safe(vma, v, &obj->vma_list, vma_link) if (i915_vma_unbind(vma)) break; - if (!i915_gem_object_put_pages(obj)) { + if (i915_gem_object_put_pages(obj) == 0) count += obj->base.size >> PAGE_SHIFT; - if (count >= target) - return count; - } + else + list_move_tail(&obj->global_list, &still_bound_list); + + drm_gem_object_unreference(&obj->base); } + list_splice(&still_bound_list, &dev_priv->mm.bound_list); return count; } -- 1.8.4.rc3