From 1f1b991e09fdf6294de4d2ff9d84f56bf1c1b032 Mon Sep 17 00:00:00 2001 From: Cody Northrop Date: Thu, 12 Jun 2014 09:07:18 -0600 Subject: [PATCH] i965/fs: Update discard jump to preserve uniform loads via sampler. The series that implemented this optimization was done before the changes to use samplers for uniform loads. Uniform sampler loads use special execution masks and only populate four channels, so we can't jump over those or corruption ensues. Use a more conservative jump mask which only jumps to the end if all relevant channels are disabled. No change was observed in GLbenchmark 2.7, so the optimization is preserved. Signed-off-by: Cody Northrop --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 8858852..7e7ac9d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -1907,7 +1907,15 @@ fs_visitor::visit(ir_discard *ir) */ fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP); discard_jump->flag_subreg = 1; - discard_jump->predicate = BRW_PREDICATE_ALIGN1_ANY4H; + + /* Due to how uniforms are loaded with texture sampling, which does + * not respect the predicate mask, we can only jump if all relevant + * channels are dead. This is more conservative than the previous + * 4 channel checking, but still preserves speedups. + */ + discard_jump->predicate = (8 == dispatch_width) + ? BRW_PREDICATE_ALIGN1_ANY8H + : BRW_PREDICATE_ALIGN1_ANY16H; discard_jump->predicate_inverse = true; } } -- 1.8.3.2