diff --git a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c index fa2e80f..149341d 100644 --- a/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c +++ b/src/gallium/drivers/r300/compiler/radeon_pair_schedule.c @@ -298,9 +298,53 @@ static void calc_score_deps(struct schedule_instruction * sinst) #endif -#define NO_READ_TEX_SCORE (1 << 16) #define NO_OUTPUT_SCORE (1 << 24) +static void score_no_output(struct schedule_instruction * sinst) +{ + assert(sinst->Instruction->Type != RC_INSTRUCTION_NORMAL); + if (!sinst->Instruction->U.P.RGB.OutputWriteMask && + !sinst->Instruction->U.P.Alpha.OutputWriteMask) { + if (sinst->PairedInst) { + if (!sinst->PairedInst->Instruction->U.P. + RGB.OutputWriteMask + && !sinst->PairedInst->Instruction->U.P. + Alpha.OutputWriteMask) { + sinst->Score |= NO_OUTPUT_SCORE; + } + + } else { + sinst->Score |= NO_OUTPUT_SCORE; + } + } +} + +#define PAIRED_SCORE (1 << 16) + +static void calc_score_r300(struct schedule_instruction * sinst) +{ + unsigned src_idx; + + if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { + sinst->Score = 0; + return; + } + + score_no_output(sinst); + + if (sinst->PairedInst) { + sinst->Score |= PAIRED_SCORE; + return; + } + + for (src_idx = 0; src_idx < 4; src_idx++) { + sinst->Score += sinst->Instruction->U.P.RGB.Src[src_idx].Used + + sinst->Instruction->U.P.Alpha.Src[src_idx].Used; + } +} + +#define NO_READ_TEX_SCORE (1 << 16) + static void calc_score_readers(struct schedule_instruction * sinst) { if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL) { @@ -313,20 +357,7 @@ static void calc_score_readers(struct schedule_instruction * sinst) if (get_tex_read_count(sinst) == 0) { sinst->Score |= NO_READ_TEX_SCORE; } - if (!sinst->Instruction->U.P.RGB.OutputWriteMask && - !sinst->Instruction->U.P.Alpha.OutputWriteMask) { - if (sinst->PairedInst) { - if (!sinst->PairedInst->Instruction->U.P. - RGB.OutputWriteMask - && !sinst->PairedInst->Instruction->U.P. - Alpha.OutputWriteMask) { - sinst->Score |= NO_OUTPUT_SCORE; - } - - } else { - sinst->Score |= NO_OUTPUT_SCORE; - } - } + score_no_output(sinst); } } @@ -1073,6 +1104,10 @@ static void emit_instruction( #endif for (tex_ptr = s->ReadyTEX; tex_ptr; tex_ptr = tex_ptr->NextReady) { + if (tex_ptr->Instruction->U.I.Opcode == RC_OPCODE_KIL) { + emit_all_tex(s, before); + return; + } tex_count++; } update_max_score(s, &s->ReadyFullALU, &max_score, &max_inst, &max_list); @@ -1080,7 +1115,8 @@ static void emit_instruction( update_max_score(s, &s->ReadyAlpha, &max_score, &max_inst, &max_list); if (tex_count >= s->max_tex_group || max_score == -1 - || (s->TEXCount > 0 && tex_count == s->TEXCount)) { + || (s->TEXCount > 0 && tex_count == s->TEXCount) + || (!s->C->is_r500 && tex_count > 0 && max_score == -1)) { emit_all_tex(s, before); } else { @@ -1317,15 +1353,7 @@ static void schedule_block(struct schedule_state * s, /* Schedule instructions back */ while(!s->C->Error && (s->ReadyTEX || s->ReadyRGB || s->ReadyAlpha || s->ReadyFullALU)) { - if (s->C->is_r500) { - emit_instruction(s, end); - } else { - if (s->ReadyTEX) - emit_all_tex(s, end); - - while(!s->C->Error && (s->ReadyFullALU || s->ReadyRGB || s->ReadyAlpha)) - emit_one_alu(s, end); - } + emit_instruction(s, end); } } @@ -1348,7 +1376,11 @@ void rc_pair_schedule(struct radeon_compiler *cc, void *user) memset(&s, 0, sizeof(s)); s.Opt = *opt; s.C = &c->Base; - s.CalcScore = calc_score_readers; + if (s.C->is_r500) { + s.CalcScore = calc_score_readers; + } else { + s.CalcScore = calc_score_r300; + } s.max_tex_group = debug_get_num_option("RADEON_TEX_GROUP", 8); while(inst != &c->Base.Program.Instructions) { struct rc_instruction * first; diff --git a/src/gallium/drivers/r300/compiler/radeon_program_alu.c b/src/gallium/drivers/r300/compiler/radeon_program_alu.c index dd1dfb3..c48f936 100644 --- a/src/gallium/drivers/r300/compiler/radeon_program_alu.c +++ b/src/gallium/drivers/r300/compiler/radeon_program_alu.c @@ -1165,35 +1165,79 @@ int radeonTransformDeriv(struct radeon_compiler* c, } /** + * IF Temp[0].x -> IF Temp[0].x + * ... -> ... + * KILP -> KIL -abs(Temp[0].x) + * ... -> ... + * ENDIF -> ENDIF + * + * === OR === + * * IF Temp[0].x -\ * KILP - > KIL -abs(Temp[0].x) * ENDIF -/ * - * This needs to be done in its own pass, because it modifies the instructions - * before and after KILP. + * === OR === + * + * IF Temp[0].x -> IF Temp[0].x + * ... -> ... + * ELSE -> ELSE + * ... -> ... + * KILP -> KIL -abs(Temp[0].x) + * ... -> ... + * ENDIF -> ENDIF + * + * === OR === + * + * KILP -> KIL -none.1111 + * + * This needs to be done in its own pass, because it might modify the + * instructions before and after KILP. */ void rc_transform_KILP(struct radeon_compiler * c, void *user) { struct rc_instruction * inst; for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { + struct rc_instruction * if_inst; + unsigned in_if = 0; if (inst->U.I.Opcode != RC_OPCODE_KILP) continue; + for (if_inst = inst->Prev; if_inst != &c->Program.Instructions; + if_inst = if_inst->Prev) { + + if (if_inst->U.I.Opcode == RC_OPCODE_IF) { + in_if = 1; + break; + } + } + inst->U.I.Opcode = RC_OPCODE_KIL; - if (inst->Prev->U.I.Opcode != RC_OPCODE_IF - || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { + if (!in_if) { inst->U.I.SrcReg[0] = negate(builtin_one); } else { - + /* This should work even if the KILP is inside the ELSE + * block, because -0.0 is considered negative. */ inst->U.I.SrcReg[0] = - negate(absolute(inst->Prev->U.I.SrcReg[0])); - /* Remove IF */ - rc_remove_instruction(inst->Prev); - /* Remove ENDIF */ - rc_remove_instruction(inst->Next); + negate(absolute(if_inst->U.I.SrcReg[0])); + + if (inst->Prev->U.I.Opcode != RC_OPCODE_IF + && inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { + + /* Optimize the special case: + * IF Temp[0].x + * KILP + * ENDIF + */ + + /* Remove IF */ + rc_remove_instruction(inst->Prev); + /* Remove ENDIF */ + rc_remove_instruction(inst->Next); + } } } }