From 4d0b60444e0db2c6b0ac85d55ef9beb2526e3542 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sat, 15 Sep 2012 23:25:34 -0400 Subject: [PATCH 2/2] r300/compiler: Avoid generating MOV instructions for invalid IMM swizzles If an instruction reads from a constant register that contains immediates using an invalid swizzle, we can avoid generating MOV instructions to fix up the swizzle by loading the immediates into a different constant register that can be read using a valid swizzle. This only affects r300 and r400 cards. For example: CONST[1] = { -3.5000 3.5000 2.5000 1.5000 } MAD temp[4].xy, const[0].xy__, const[1].xz__, input[0].xy__; ========== Before this change would be lowered to: ========= CONST[1] = { -3.5000 3.5000 2.5000 1.5000 } MOV temp[0].x, const[1].x___; MOV temp[0].y, const[1]._z__; MAD temp[4].xy, const[0].xy__, temp[0].xy__, input[0].xy__; ========== After this change is lowered to: =============== CONST[1] = { -3.5000 3.5000 2.5000 1.5000 } CONST[2] = { 0.0000 -3.5000 2.5000 0.0000 } MAD temp[4].xy, const[0].xy__, const[2].yz__, input[0].xy__; ============================================================ This change reduces one of the Lightsmark shaders from 133 to 107 instructions. --- .../r300/compiler/radeon_dataflow_swizzles.c | 218 +++++++++++++++++++- 1 files changed, 216 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c index 3e1528c..14cf2d2 100644 --- a/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c +++ b/src/gallium/drivers/r300/compiler/radeon_dataflow_swizzles.c @@ -27,7 +27,9 @@ #include "radeon_dataflow.h" +#include "radeon_code.h" #include "radeon_compiler.h" +#include "radeon_compiler_util.h" #include "radeon_list.h" #include "radeon_swizzle.h" #include "radeon_variable.h" @@ -89,13 +91,216 @@ static void rewrite_source(struct radeon_compiler * c, } } +static unsigned try_rewrite_constant(struct radeon_compiler *c, + struct rc_src_register *reg) +{ + unsigned new_swizzle, chan, swz0, swz1, swz2, found_swizzle; + float imms[4] = {0.0f, 0.0f, 0.0f, 0.0f}; + + if (!rc_src_reg_is_immediate(c, reg->File, reg->Index)) { + return 0; + } + + /* Check for constant swizzles */ + for (chan = 0; chan < 4; chan++) { + unsigned swz = GET_SWZ(reg->Swizzle, chan); + if (swz == RC_SWIZZLE_ONE || swz == RC_SWIZZLE_ZERO || + swz == RC_SWIZZLE_HALF) { + return 0; + } + } + + /* Find a legal swizzle */ + new_swizzle = reg->Swizzle; + + found_swizzle = 0; + /* This loop attempts to find a native swizzle where all the + * channels are different. */ + while (!found_swizzle) { + swz0 = GET_SWZ(new_swizzle, 0); + swz1 = GET_SWZ(new_swizzle, 1); + swz2 = GET_SWZ(new_swizzle, 2); + /* Swizzle .W. is never legal. */ + if (swz1 == RC_SWIZZLE_W) { + /* We chose Z, because there are two non-repeating + * swizzle combinations of the form .Z. There are + * only one combination each for .X. and .Y. */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); + continue; + } + + switch (swz0) { + /* X.. */ + case RC_SWIZZLE_X: + /* Legal swizzles that start with X: XYZ, XXX */ + switch (swz1) { + /* XX. */ + case RC_SWIZZLE_X: + assert(swz2 != RC_SWIZZLE_X); + /* The new swizzle will be: + * ZXY (XX. => ZX. => ZXY) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z); + break; + /* XY. */ + case RC_SWIZZLE_Y: + /* The new swizzle is XYZ */ + SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Z); + found_swizzle = 1; + break; + /* XZ. */ + case RC_SWIZZLE_Z: + /* XZZ */ + if (swz2 == RC_SWIZZLE_Z) { + /* The new swizzle is XYZ */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Y); + found_swizzle = 1; + } else { /* XZ[^Z] */ + /* The new swizzle will be: + * YZX (XZ. => YZ. => YZX) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Y); + } + break; + /* XW. Should have already been handled. */ + case RC_SWIZZLE_W: + assert(0); + break; + } + break; + /* Y.. */ + case RC_SWIZZLE_Y: + /* Legal swizzles that start with Y: YYY, YZX */ + switch (swz1) { + /* YY. */ + case RC_SWIZZLE_Y: + assert(swz2 != RC_SWIZZLE_Y); + /* The new swizzle will be: + * XYZ (YY. => XY. => XYZ) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); + break; + /* YZ. */ + case RC_SWIZZLE_Z: + /* The new swizzle is YZX */ + SET_SWZ(new_swizzle, 2, RC_SWIZZLE_X); + found_swizzle = 1; + break; + /* YX. */ + case RC_SWIZZLE_X: + /* YXX */ + if (swz2 == RC_SWIZZLE_X) { + /*The new swizzle is YZX */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); + found_swizzle = 1; + } else { /* YX[^X] */ + /* The new swizzle will be: + * ZXY (YX. => ZX. -> ZXY) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_Z); + } + break; + /* YW. Should have already been handled. */ + case RC_SWIZZLE_W: + assert(0); + break; + } + break; + /* Z.. */ + case RC_SWIZZLE_Z: + /* Legal swizzles that start with Z: ZZZ, ZXY */ + switch (swz1) { + /* ZZ. */ + case RC_SWIZZLE_Z: + assert (swz2 != RC_SWIZZLE_Z); + /* The new swizzle will be: + * WZY (ZZ. => WZ. => WZY) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_W); + break; + /* ZX. */ + case RC_SWIZZLE_X: + /* The new swizzle is ZXY */ + SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); + found_swizzle = 1; + break; + /* ZY. */ + case RC_SWIZZLE_Y: + /* ZYY */ + if (swz2 == RC_SWIZZLE_Y) { + /* The new swizzle is ZXY */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_X); + found_swizzle = 1; + } else { /* ZY[^Y] */ + /* The new swizzle will be: + * XYZ (ZY. => XY. => XYZ) */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); + } + break; + /* ZW. Should have already been handled. */ + case RC_SWIZZLE_W: + assert(0); + break; + } + break; + + /* W.. */ + case RC_SWIZZLE_W: + /* Legal swizzles that start with X: WWW, WZY */ + switch (swz1) { + /* WW. Should have already been handled. */ + case RC_SWIZZLE_W: + assert(0); + break; + /* WZ. */ + case RC_SWIZZLE_Z: + /* The new swizzle will be WZY */ + SET_SWZ(new_swizzle, 2, RC_SWIZZLE_Y); + found_swizzle = 1; + break; + /* WX. */ + case RC_SWIZZLE_X: + /* WY. */ + case RC_SWIZZLE_Y: + /* W[XY]Y */ + if (swz2 == RC_SWIZZLE_Y) { + /* The new swizzle will be WZY */ + SET_SWZ(new_swizzle, 1, RC_SWIZZLE_Z); + found_swizzle = 1; + } else { /* W[XY][^Y] */ + /* The new swizzle will be: + * ZXY (WX. => XX. => ZX. => ZXY) or + * XYZ (WY. => XY. => XYZ) + */ + SET_SWZ(new_swizzle, 0, RC_SWIZZLE_X); + } + break; + } + break; + } + } + fprintf(stderr, "Old swizzle is %x new sizzle is %x\n", reg->Swizzle, new_swizzle); + for (chan = 0; chan < 4; chan++) { + unsigned old_swz = GET_SWZ(reg->Swizzle, chan); + unsigned new_swz = GET_SWZ(new_swizzle, chan); + + if (old_swz == RC_SWIZZLE_UNUSED) { + continue; + } + fprintf(stderr, "Chan = %u new_swz = %u\n", chan, new_swz); + imms[new_swz] = rc_get_constant_value(c, reg->Index, + reg->Swizzle, reg->Negate, chan); + SET_SWZ(reg->Swizzle, chan, new_swz); + fprintf(stderr, "swizzle is now %x\n", reg->Swizzle); + } + reg->Index = rc_constants_add_immediate_vec4(&c->Program.Constants, + imms); + reg->Negate = 0; + return 1; +} + void rc_dataflow_swizzles(struct radeon_compiler * c, void *user) { struct rc_list *var_ptr; struct rc_list *variables; variables = rc_get_variables(c); - + rc_constants_print(&c->Program.Constants); for (var_ptr = variables; var_ptr; var_ptr = var_ptr->Next) { struct rc_variable *var; for (var = var_ptr->Item; var; var = var->Friend) { @@ -104,11 +309,20 @@ void rc_dataflow_swizzles(struct radeon_compiler * c, void *user) const struct rc_opcode_info *opcode_info = rc_get_opcode_info(inst->U.I.Opcode); for(src = 0; src < opcode_info->NumSrcRegs; ++src) { + struct rc_src_register *reg = + &inst->U.I.SrcReg[src]; if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, - inst->U.I.SrcReg[src])) { + *reg)) { + if (reg->File == RC_FILE_CONSTANT && + !c->is_r500 && + c->Program.Constants.Count < R300_PFS_NUM_CONST_REGS && + try_rewrite_constant(c, reg)) { + continue; + } rewrite_source(c, inst, src); } } } } + rc_constants_print(&c->Program.Constants); } -- 1.7.8.6