r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'lower control flow opcodes' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 2: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: DRM version: 2.38.0, Name: ATI RV530, ID: 0x71c5, GB: 1, Z: 2 r300: GART size: 509 MB, VRAM size: 256 MB r300: AA compression RAM: YES, Z compression RAM: YES, HiZ RAM: YES r300: Initial vertex program VERT DCL IN[0] DCL OUT[0], POSITION 0: MOV OUT[0], IN[0] 1: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[0], temp[0]; 2: MOV output[1], temp[0]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[0], temp[0]; 2: MOV output[1], temp[0]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[0], temp[0]; 2: MOV output[1], temp[0]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[0], temp[0]; 2: MOV output[1], temp[0]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], input[0]; 1: MOV output[1], input[0]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[0], input[0]; 1: MOV output[1], input[0]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[0], input[0]; 1: MOV output[1], input[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], input[0]; 1: MOV output[1], input[0]; Vertex Program: after 'lower control flow opcodes' # Radeon Compiler Program 0: MOV output[0], input[0]; 1: MOV output[1], input[0]; Final vertex program code: 0: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 1: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL OUT[0], COLOR DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 8.0000, 4.0000, 1.0000} 0: MOV TEMP[0].x, IMM[0].xxxx 1: BGNLOOP :0 2: SGE TEMP[1].x, TEMP[0].xxxx, IMM[0].yyyy 3: IF TEMP[1].xxxx :0 4: BRK 5: ENDIF 6: SLT TEMP[2].x, IMM[0].zzzz, TEMP[0].xxxx 7: IF TEMP[2].xxxx :0 8: MOV TEMP[3], IMM[0].xwxx 9: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 10: ELSE :0 11: MOV TEMP[3], IMM[0].wxxx 12: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 13: ENDIF 14: ENDLOOP :0 15: MOV OUT[0], TEMP[3] 16: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV temp[0].x, const[0].xxxx; 1: BGNLOOP; 2: SGE temp[1].x, temp[0].xxxx, const[0].yyyy; 3: IF temp[1].xxxx; 4: BRK; 5: ENDIF; 6: SLT temp[2].x, const[0].zzzz, temp[0].xxxx; 7: IF temp[2].xxxx; 8: MOV temp[3], const[0].xwxx; 9: ADD temp[0].x, temp[0].xxxx, const[0].wwww; 10: ELSE; 11: MOV temp[3], const[0].wxxx; 12: ADD temp[0].x, temp[0].xxxx, const[0].wwww; 13: ENDIF; 14: ENDLOOP; 15: MOV output[0], temp[3]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV temp[0].x, const[0].xxxx; 1: BGNLOOP; 2: SGE temp[1].x, temp[0].xxxx, const[0].yyyy; 3: IF temp[1].xxxx; 4: BRK; 5: ENDIF; 6: SLT temp[2].x, const[0].zzzz, temp[0].xxxx; 7: IF temp[2].xxxx; 8: MOV temp[3], const[0].xwxx; 9: ADD temp[0].x, temp[0].xxxx, const[0].wwww; 10: ELSE; 11: MOV temp[3], const[0].wxxx; 12: ADD temp[0].x, temp[0].xxxx, const[0].wwww; 13: ENDIF; 14: ENDLOOP; 15: MOV output[0], temp[3]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV temp[0].x, const[0].xxxx; 1: BGNLOOP; 2: SGE temp[1].x, temp[0].xxxx, const[0].yyyy; 3: IF temp[1].xxxx; 4: BRK; 5: ENDIF; 6: SLT temp[2].x, const[0].zzzz, temp[0].xxxx; 7: IF temp[2].xxxx; 8: MOV temp[3], const[0].xwxx; 9: ADD temp[0].x, temp[0].xxxx, const[0].wwww; 10: ELSE; 11: MOV temp[3], const[0].wxxx; 12: ADD temp[0].x, temp[0].xxxx, const[0].wwww; 13: ENDIF; 14: ENDLOOP; 15: MOV output[0], temp[3]; Fragment Program: after 'unroll loops' # Radeon Compiler Program 0: MOV temp[0].x, const[0].xxxx; 1: BGNLOOP; 2: SGE temp[1].x, temp[0].xxxx, const[0].yyyy; 3: IF temp[1].xxxx; 4: BRK; 5: ENDIF; 6: SLT temp[2].x, const[0].zzzz, temp[0].xxxx; 7: IF temp[2].xxxx; 8: MOV temp[3], const[0].xwxx; 9: ADD temp[0].x, temp[0].xxxx, const[0].wwww; 10: ELSE; 11: MOV temp[3], const[0].wxxx; 12: ADD temp[0].x, temp[0].xxxx, const[0].wwww; 13: ENDIF; 14: ENDLOOP; 15: MOV output[0], temp[3]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV temp[0].x, const[0].xxxx; 1: BGNLOOP; 2: SGE temp[1].x, temp[0].xxxx, const[0].yyyy; 3: IF temp[1].xxxx; 4: BRK; 5: ENDIF; 6: SLT temp[2].x, const[0].zzzz, temp[0].xxxx; 7: IF temp[2].xxxx; 8: MOV temp[3], const[0].xwxx; 9: ADD temp[0].x, temp[0].xxxx, const[0].wwww; 10: ELSE; 11: MOV temp[3], const[0].wxxx; 12: ADD temp[0].x, temp[0].xxxx, const[0].wwww; 13: ENDIF; 14: ENDLOOP; 15: MOV output[0], temp[3]; Fragment Program: after 'transform IF' # Radeon Compiler Program 0: MOV temp[0].x, const[0].xxxx; 1: BGNLOOP; 2: SUB none., temp[0].xxxx, const[0].yyyy; [aluresult = (x >= 0)] 3: IF aluresult.x___; 4: BRK; 5: ENDIF; 6: SUB none., const[0].zzzz, temp[0].xxxx; [aluresult = (x < 0)] 7: IF aluresult.x___; 8: MOV temp[3], const[0].xwxx; 9: ADD temp[0].x, temp[0].xxxx, const[0].wwww; 10: ELSE; 11: MOV temp[3], const[0].wxxx; 12: ADD temp[0].x, temp[0].xxxx, const[0].wwww; 13: ENDIF; 14: ENDLOOP; 15: MOV output[0], temp[3]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0].x, const[0].xxxx; 1: BGNLOOP; 2: ADD none., temp[0].xxxx, -const[0].yyyy; [aluresult = (x >= 0)] 3: IF aluresult.x___; 4: BRK; 5: ENDIF; 6: ADD none., const[0].zzzz, -temp[0].xxxx; [aluresult = (x < 0)] 7: IF aluresult.x___; 8: MOV temp[3], const[0].xwxx; 9: ADD temp[0].x, temp[0].xxxx, const[0].wwww; 10: ELSE; 11: MOV temp[3], const[0].wxxx; 12: ADD temp[0].x, temp[0].xxxx, const[0].wwww; 13: ENDIF; 14: ENDLOOP; 15: MOV output[0], temp[3]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0].x, const[0].x___; 1: BGNLOOP; 2: ADD none., temp[0].x___, -const[0].y___; [aluresult = (x >= 0)] 3: IF aluresult.x___; 4: BRK; 5: ENDIF; 6: ADD none., const[0].z___, -temp[0].x___; [aluresult = (x < 0)] 7: IF aluresult.x___; 8: MOV temp[3], const[0].xwxx; 9: ADD temp[0].x, temp[0].x___, const[0].w___; 10: ELSE; 11: MOV temp[3], const[0].wxxx; 12: ADD temp[0].x, temp[0].x___, const[0].w___; 13: ENDIF; 14: ENDLOOP; 15: MOV output[0], temp[3]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV temp[0].x, const[0].x___; 1: BGNLOOP; 2: ADD none., temp[0].x___, -const[0].y___; [aluresult = (x >= 0)] 3: IF aluresult.x___; 4: BRK; 5: ENDIF; 6: ADD none., const[0].z___, -temp[0].x___; [aluresult = (x < 0)] 7: IF aluresult.x___; 8: MOV temp[3], const[0].xwxx; 9: ADD temp[0].x, temp[0].x___, const[0].w___; 10: ELSE; 11: MOV temp[3], const[0].wxxx; 12: ADD temp[0].x, temp[0].x___, const[0].w___; 13: ENDIF; 14: ENDLOOP; 15: MOV output[0], temp[3]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV temp[0].x, none.0___; 1: BGNLOOP; 2: ADD none., temp[0].x___, -const[0].y___; [aluresult = (x >= 0)] 3: IF aluresult.x___; 4: BRK; 5: ENDIF; 6: ADD none., const[0].z___, -temp[0].x___; [aluresult = (x < 0)] 7: IF aluresult.x___; 8: MOV temp[3], none.0100; 9: ADD temp[0].x, temp[0].x___, none.1___; 10: ELSE; 11: MOV temp[3], none.1000; 12: ADD temp[0].x, temp[0].x___, none.1___; 13: ENDIF; 14: ENDLOOP; 15: MOV output[0], temp[3]; Fragment Program: after 'inline literals' # Radeon Compiler Program 0: MOV temp[0].x, none.0___; 1: BGNLOOP; 2: ADD none., temp[0].x___, -8.000000 (0x50).w___; [aluresult = (x >= 0)] 3: IF aluresult.x___; 4: BRK; 5: ENDIF; 6: ADD none., 4.000000 (0x48).w___, -temp[0].x___; [aluresult = (x < 0)] 7: IF aluresult.x___; 8: MOV temp[3], none.0100; 9: ADD temp[0].x, temp[0].x___, none.1___; 10: ELSE; 11: MOV temp[3], none.1000; 12: ADD temp[0].x, temp[0].x___, none.1___; 13: ENDIF; 14: ENDLOOP; 15: MOV output[0], temp[3]; CONST[0] = { 0.0000 8.0000 4.0000 1.0000 } Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV temp[0].x, none.0___; 1: BGNLOOP; 2: ADD none., temp[0].x___, -8.000000 (0x50).w___; [aluresult = (x >= 0)] 3: IF aluresult.x___; 4: BRK; 5: ENDIF; 6: ADD none., 4.000000 (0x48).w___, -temp[0].x___; [aluresult = (x < 0)] 7: IF aluresult.x___; 8: MOV temp[3], none.0100; 9: ADD temp[0].x, temp[0].x___, none.1___; 10: ELSE; 11: MOV temp[3], none.1000; 12: ADD temp[0].x, temp[0].x___, none.1___; 13: ENDIF; 14: ENDLOOP; 15: MOV output[0], temp[3]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV temp[0].x, none.0___; 1: BGNLOOP; 2: ADD none., temp[0].x___, -8.000000 (0x50).w___; [aluresult = (x >= 0)] 3: IF aluresult.x___; 4: BRK; 5: ENDIF; 6: ADD none., 4.000000 (0x48).w___, -temp[0].x___; [aluresult = (x < 0)] 7: IF aluresult.x___; 8: MOV temp[3], none.0100; 9: ADD temp[0].x, temp[0].x___, none.1___; 10: ELSE; 11: MOV temp[3], none.1000; 12: ADD temp[0].x, temp[0].x___, none.1___; 13: ENDIF; 14: ENDLOOP; 15: MOV output[0], temp[3]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: MAD temp[0].x, src0.0__, src0.111, src0.000 1: BGNLOOP; 2: src0.xyz = temp[0], src0.w = 8.000000 (0x50) MAD aluresult, src0.x__, src0.111, -src0.w__ [aluresult = (result >= 0)] 3: IF aluresult.x___; 4: BRK; 5: ENDIF; 6: src0.xyz = temp[0], src0.w = 4.000000 (0x48) MAD aluresult, src0.w__, src0.111, -src0.x__ [aluresult = (result < 0)] 7: IF aluresult.x___; 8: MAD temp[3].xyz, src0.010, src0.111, src0.000 MAD temp[3].w, src0.0, src0.1, src0.0 9: src0.xyz = temp[0] MAD temp[0].x, src0.x__, src0.111, src0.1__ 10: ELSE; 11: MAD temp[3].xyz, src0.100, src0.111, src0.000 MAD temp[3].w, src0.0, src0.1, src0.0 12: src0.xyz = temp[0] MAD temp[0].x, src0.x__, src0.111, src0.1__ 13: ENDIF; 14: ENDLOOP; 15: src0.xyz = temp[3], src0.w = temp[3] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: MAD temp[0].x, src0.0__, src0.111, src0.000 1: BGNLOOP; 2: src0.xyz = temp[0], src0.w = 8.000000 (0x50) MAD aluresult, src0.x__, src0.111, -src0.w__ [aluresult = (result >= 0)] 3: IF aluresult.x___; 4: BRK; 5: ENDIF; 6: src0.xyz = temp[0], src0.w = 4.000000 (0x48) MAD aluresult, src0.w__, src0.111, -src0.x__ [aluresult = (result < 0)] 7: IF aluresult.x___; 8: MAD temp[3].xyz, src0.010, src0.111, src0.000 MAD temp[3].w, src0.0, src0.1, src0.0 9: src0.xyz = temp[0] MAD temp[0].x, src0.x__, src0.111, src0.1__ 10: ELSE; 11: MAD temp[3].xyz, src0.100, src0.111, src0.000 MAD temp[3].w, src0.0, src0.1, src0.0 12: src0.xyz = temp[0] MAD temp[0].x, src0.x__, src0.111, src0.1__ 13: ENDIF; 14: ENDLOOP; 15: src0.xyz = temp[3], src0.w = temp[3] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: MAD temp[0].x, src0.0__, src0.111, src0.000 1: BGNLOOP; 2: src0.xyz = temp[0], src0.w = 8.000000 (0x50) MAD aluresult, src0.x__, src0.111, -src0.w__ [aluresult = (result >= 0)] 3: IF aluresult.x___; 4: BRK; 5: ENDIF; 6: src0.xyz = temp[0], src0.w = 4.000000 (0x48) MAD aluresult, src0.w__, src0.111, -src0.x__ [aluresult = (result < 0)] 7: IF aluresult.x___; 8: MAD temp[3].xyz, src0.010, src0.111, src0.000 MAD temp[3].w, src0.0, src0.1, src0.0 9: src0.xyz = temp[0] MAD temp[0].x, src0.x__, src0.111, src0.1__ 10: ELSE; 11: MAD temp[3].xyz, src0.100, src0.111, src0.000 MAD temp[3].w, src0.0, src0.1, src0.0 12: src0.xyz = temp[0] MAD temp[0].x, src0.x__, src0.111, src0.1__ 13: ENDIF; 14: ENDLOOP; 15: src0.xyz = temp[3], src0.w = temp[3] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0