From cd1bfb513bf380c1dbaef23ced66dbd1971cf3cd Mon Sep 17 00:00:00 2001 From: Ben Widawsky Date: Thu, 4 Dec 2014 10:29:19 -0800 Subject: [PATCH] i965/gs: Avoid DW * DW mul The GS has an interesting use for mul. It's essentially used as a fancy mov (in fact, I am not sure why a mov isn't used). The documentation in the function has a very good explanation from Paul on the mechanics. CHV has some quirks with regard to multiplication. While the documentation is somewhat unclear, I've found that demoting the src1 operand in the GS mul solves all the problems. I'd ask that any potential reviewer ignore the other instances of mul for now (I have more patches), and simply make sure that what this patch does is correct. This fixes around 2000 piglit tests on BSW. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=84777 (with many dupes) Signed-off-by: Ben Widawsky --- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 5 ++++- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index b353539..4f60797 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -534,8 +534,11 @@ vec4_generator::generate_gs_set_write_offset(struct brw_reg dst, brw_push_insn_state(p); brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); + assert(src1.file == BRW_IMMEDIATE_VALUE && + src1.type == BRW_REGISTER_TYPE_UD && + src1.dw1.ud <= SHRT_MAX); brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4), - src1); + retype(src1, BRW_REGISTER_TYPE_UW)); brw_set_default_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index db0e6cc..2d7ae5a 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -388,7 +388,8 @@ vec4_gs_visitor::emit_control_data_bits() */ src_reg per_slot_offset(this, glsl_type::uint_type); emit(SHR(dst_reg(per_slot_offset), dword_index, 2u)); - emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, 1u); + emit(GS_OPCODE_SET_WRITE_OFFSET, mrf_reg, per_slot_offset, + src_reg(1u)); } if (urb_write_flags & BRW_URB_WRITE_USE_CHANNEL_MASKS) { -- 2.1.3