From 871671c4ab8ff00e85b434865e8855fc356efa8f Mon Sep 17 00:00:00 2001 From: Cody Northrop Date: Thu, 12 Jun 2014 09:07:18 -0600 Subject: [PATCH] i965/fs: Update discard jump to preserve uniform loads via sampler. The series that implemented this optimization was done before the changes to use samplers for uniform loads. Uniform sampler loads use special execution masks and only populate four channels, so we can't jump over those or corruption ensues. Use a more conservative jump mask which only jumps to the end if all relevant channels are disabled. No change was observed in GLbenchmark 2.7, so the optimization is preserved. Signed-off-by: Cody Northrop Reviewed-by: Mike Stroyan Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=79948 --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 8858852..fe05715 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1907,7 +1907,15 @@ fs_visitor::visit(ir_discard *ir) */ fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP); discard_jump->flag_subreg = 1; - discard_jump->predicate = BRW_PREDICATE_ALIGN1_ANY4H; + + /* Uniforms are now loaded using samplers with a routine that has + * its own execution mask, so we can only jump if all relevant + * channels are dead. This is more conservative than the previous + * four channel checking, but still preserves speedups. + */ + discard_jump->predicate = (8 == dispatch_width) + ? BRW_PREDICATE_ALIGN1_ANY8H + : BRW_PREDICATE_ALIGN1_ANY16H; discard_jump->predicate_inverse = true; } } -- 1.8.3.2