commit 0c6a60d4a4a761f42c9c0a6c2061995c4609c977 Author: Glenn Kennard Date: Tue Nov 11 22:21:42 2014 +0100 r600g: Avoid clobbering src with dst if same reg Also use ALU_OP2_MULLO_UINT for UMUL on cayman for consistency with r600/evergreen. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85376 diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index aab4215..f562504 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -2729,6 +2729,7 @@ static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) int i, j, k, r; struct r600_bytecode_alu alu; int last_slot = (inst->Dst[0].Register.WriteMask & 0x8) ? 4 : 3; + int treg = r600_get_temp(ctx); for (k = 0; k < last_slot; k++) { if (!(inst->Dst[0].Register.WriteMask & (1 << k))) continue; @@ -2739,7 +2740,8 @@ static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) for (j = 0; j < inst->Instruction.NumSrcRegs; j++) { r600_bytecode_src(&alu.src[j], &ctx->src[j], k); } - tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.dst.sel = treg; + alu.dst.chan = k; alu.dst.write = (i == k); if (i == 3) alu.last = 1; @@ -2748,6 +2750,24 @@ static int cayman_mul_int_instr(struct r600_shader_ctx *ctx) return r; } } + + for (k = 0; k < last_slot; k++) { + if (!(inst->Dst[0].Register.WriteMask & (1 << k))) + continue; + + memset(&alu, 0, sizeof(struct r600_bytecode_alu)); + alu.op = ALU_OP1_MOV; + alu.src[0].sel = treg; + alu.src[0].chan = k; + tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst); + alu.dst.write = 1; + if (k == 3) + alu.last = 1; + r = r600_bytecode_add_alu(ctx->bc, &alu); + if (r) + return r; + } + return 0; } @@ -7686,7 +7706,7 @@ static struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] = { {TGSI_OPCODE_UMAX, 0, ALU_OP2_MAX_UINT, tgsi_op2}, {TGSI_OPCODE_UMIN, 0, ALU_OP2_MIN_UINT, tgsi_op2}, {TGSI_OPCODE_UMOD, 0, ALU_OP0_NOP, tgsi_umod}, - {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_INT, cayman_mul_int_instr}, + {TGSI_OPCODE_UMUL, 0, ALU_OP2_MULLO_UINT, cayman_mul_int_instr}, {TGSI_OPCODE_USEQ, 0, ALU_OP2_SETE_INT, tgsi_op2}, {TGSI_OPCODE_USGE, 0, ALU_OP2_SETGE_UINT, tgsi_op2}, {TGSI_OPCODE_USHR, 0, ALU_OP2_LSHR_INT, tgsi_op2},