r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], COLOR 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 2: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 2: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], COLOR 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 2: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 2: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 2: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=0************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..3] DCL TEMP[0] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: MOV OUT[1], IN[1] 5: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[0], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[0]; 6: MOV output[2], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[0], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[0]; 6: MOV output[2], temp[0]; Final vertex program code: 0: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00000001 reg: 0i swiz: X/ X/ X/ X src1: 0x00d10002 reg: 0c swiz: X/ Y/ Z/ W src2: 0x01248002 reg: 0c swiz: 0/ 0/ 0/ 0 1: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src1: 0x00d10022 reg: 1c swiz: X/ Y/ Z/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 2: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src1: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 3: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src1: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 4: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 5: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 6: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], COLOR, PERSPECTIVE DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=1************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial fragment program FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[1], input[0], 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[1], input[0], 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MOV output[0], temp[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[0].xy__, 2D[0]; 2: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], temp[0].xy__, 2D[0]; 2: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=2************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) TEX: TEX t0, t0, texture[0] (00008000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial fragment program FRAG DCL IN[0], COLOR, LINEAR DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0] 0: TEX TEMP[0], IN[1].xyyy, SAMP[0], 2D 1: MUL OUT[0], IN[0], TEMP[0] 2: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: src0.xyz = input[0], src0.w = input[0], src1.xyz = temp[0], src1.w = temp[0] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[1].xy__, 2D[0]; 2: src0.xyz = input[0], src0.w = input[0], src1.xyz = temp[0], src1.w = temp[0] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], temp[1].xy__, 2D[0]; 2: src0.xyz = temp[0], src0.w = temp[0], src1.xyz = temp[1], src1.w = temp[1] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 pc=3************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) TEX: TEX t1, t1, texture[0] (00008041) 0: xyz: t0 t1 t0 bias-> o0.xyz (1c000040) w: t0 t1 t0 bias-> o0.w (01000040) xyz: t0.xyz t1.xyz 0.0 op: 00050200 w: t0.w t1.w 0.0 op: 00040509 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[10] DCL CONST[0..3] DCL TEMP[0..1] 0: MOV OUT[1], IN[1] 1: MOV OUT[2].xy, IN[2].xyxx 2: MUL TEMP[0], CONST[0], IN[0].xxxx 3: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[0] 4: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[1] 5: MAD OUT[0], CONST[3], IN[0].wwww, TEMP[0] 6: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xyxx; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xyxx; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xyxx; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xyxx; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xyxx; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xyxx; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[0], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[0], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00304203 dst: 2o op: VE_ADD src0: 0x01f90041 reg: 2i swiz: X/ Y/ U/ U src1: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 2: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d10002 reg: 0c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 3: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d10022 reg: 1c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 4: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 5: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 6: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 7: op: 0x00f06203 dst: 3o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=4************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..3] DCL TEMP[0..1] 0: MOV OUT[1], IN[1] 1: MUL TEMP[0], CONST[0], IN[0].xxxx 2: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[0] 3: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[1] 4: MAD OUT[0], CONST[3], IN[0].wwww, TEMP[0] 5: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[0], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[0]; 6: MOV output[2], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[0], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[0]; 6: MOV output[2], temp[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d10002 reg: 0c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 2: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d10022 reg: 1c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 3: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 4: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 5: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 6: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..1] DCL TEMP[0..1] 0: TEX TEMP[0], IN[0].xyyy, SAMP[0], 2D 1: MUL TEMP[1].w, TEMP[0].xxxw, CONST[0].xxxx 2: MUL TEMP[1].xyz, TEMP[0].xyzx, CONST[1].xyzx 3: MOV OUT[0], TEMP[1] 4: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MUL temp[1].xyz, temp[0].xyzx, const[1].xyzx; 3: MOV output[0], temp[1]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MUL temp[1].xyz, temp[0].xyzx, const[1].xyzx; 3: MOV output[0], temp[1]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MUL temp[1].xyz, temp[0].xyzx, const[1].xyzx; 3: MOV output[0], temp[1]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MUL temp[1].xyz, temp[0].xyzx, const[1].xyzx; 3: MOV output[0], temp[1]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MUL temp[1].xyz, temp[0].xyzx, const[1].xyzx; 3: MOV output[0], temp[1]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MUL temp[1].xyz, temp[0].xyzx, const[1].xyzx; 3: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MUL temp[1].xyz, temp[0].xyzx, const[1].xyzx; 3: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MUL temp[1].xyz, temp[0].xyz_, const[1].xyz_; 3: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MUL temp[1].xyz, temp[0].xyz_, const[1].xyz_; 3: MOV output[0], temp[1]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MUL temp[1].xyz, temp[0].xyz_, const[1].xyz_; 3: MOV output[0], temp[1]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MUL temp[1].xyz, temp[0].xyz_, const[1].xyz_; 3: MOV output[0], temp[1]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MUL temp[1].xyz, temp[0].xyz_, const[1].xyz_; 3: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MUL temp[1].xyz, temp[0].xyz_, const[1].xyz_; 3: MOV output[0], temp[1]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: src0.xyz = const[0], src0.w = temp[0] MAD temp[1].w, src0.w, src0.x, src0.0 2: src0.xyz = temp[0], src1.xyz = const[1] MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 3: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[0].xy__, 2D[0]; 2: src0.xyz = temp[0], src0.w = temp[0], src1.xyz = const[1], src2.xyz = const[0] MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[1].w, src0.w, src2.x, src0.0 3: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], temp[0].xy__, 2D[0]; 2: src0.xyz = temp[0], src0.w = temp[0], src1.xyz = const[1], src2.xyz = const[0] MAD temp[0].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[0].w, src0.w, src2.x, src0.0 3: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=5************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 1, tex_end: 0 (code_addr: 00400040) TEX: TEX t0, t0, texture[0] (00008000) 0: xyz: t0 c1 c0 bias-> t0.xyz (03820840) w: t0 t0 t0 bias-> t0.w (00800000) xyz: t0.xyz c1.xyz 0.0 op: 00050200 w: t0.w c0.x 0.0 op: 00040309 1: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[10] DCL CONST[0..7] DCL TEMP[0..1] 0: MUL TEMP[0], CONST[0], IN[1].xxxx 1: MAD TEMP[1], CONST[1], IN[1].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[1].zzzz, TEMP[1] 3: MAD OUT[1].xy, CONST[3].xyxx, IN[1].wwxx, TEMP[0].xyxx 4: MUL TEMP[0], CONST[4], IN[0].xxxx 5: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[0] 6: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[1] 7: MAD OUT[0], CONST[7], IN[0].wwww, TEMP[0] 8: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[0], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[0]; 9: MOV output[2], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[0], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[0]; 9: MOV output[2], temp[0]; Final vertex program code: 0: op: 0x00300002 dst: 0t op: VE_MULTIPLY src0: 0x01f90002 reg: 0c swiz: X/ Y/ U/ U src1: 0x01f80021 reg: 1i swiz: X/ X/ U/ U src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00302004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x01f90022 reg: 1c swiz: X/ Y/ U/ U src1: 0x01f92021 reg: 1i swiz: Y/ Y/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 2: op: 0x00300004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x01f90042 reg: 2c swiz: X/ Y/ U/ U src1: 0x01fa4021 reg: 1i swiz: Z/ Z/ U/ U src2: 0x01f90020 reg: 1t swiz: X/ Y/ U/ U 3: op: 0x00302204 dst: 1o op: VE_MULTIPLY_ADD src0: 0x01f90062 reg: 3c swiz: X/ Y/ U/ U src1: 0x01fb6021 reg: 1i swiz: W/ W/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 4: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 5: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 6: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 7: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 8: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 9: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL OUT[0], COLOR DCL CONST[0] 0: MOV OUT[0], CONST[0] 1: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV output[0], const[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV output[0], const[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV output[0], const[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[0], const[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[0], const[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV output[0], const[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[0], const[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[0], const[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MOV output[0], const[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], const[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[0], const[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], const[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV output[0], const[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: src0.xyz = const[0], src0.w = const[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = const[0], src0.w = const[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = const[0], src0.w = const[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=6************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) 0: xyz: c0 t0 t0 bias-> o0.xyz (1c000020) w: c0 t0 t0 bias-> o0.w (01000020) xyz: c0.xyz 1.0 0.0 op: 00050a80 w: c0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..3] DCL TEMP[0..1] 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[1] 3: MAD OUT[0], CONST[3], IN[0].wwww, TEMP[0] 4: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], const[0], input[0].xxxx; 1: MAD temp[1], const[1], input[0].yyyy, temp[0]; 2: MAD temp[0], const[2], input[0].zzzz, temp[1]; 3: MAD temp[2], const[3], input[0].wwww, temp[0]; 4: MOV output[0], temp[2]; 5: MOV output[1], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], const[0], input[0].xxxx; 1: MAD temp[1], const[1], input[0].yyyy, temp[0]; 2: MAD temp[0], const[2], input[0].zzzz, temp[1]; 3: MAD temp[2], const[3], input[0].wwww, temp[0]; 4: MOV output[0], temp[2]; 5: MOV output[1], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], const[0], input[0].xxxx; 1: MAD temp[1], const[1], input[0].yyyy, temp[0]; 2: MAD temp[0], const[2], input[0].zzzz, temp[1]; 3: MAD temp[2], const[3], input[0].wwww, temp[0]; 4: MOV output[0], temp[2]; 5: MOV output[1], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], const[0], input[0].xxxx; 1: MAD temp[1], const[1], input[0].yyyy, temp[0]; 2: MAD temp[0], const[2], input[0].zzzz, temp[1]; 3: MAD temp[2], const[3], input[0].wwww, temp[0]; 4: MOV output[0], temp[2]; 5: MOV output[1], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], const[0], input[0].xxxx; 1: MAD temp[1], const[1], input[0].yyyy, temp[0]; 2: MAD temp[0], const[2], input[0].zzzz, temp[1]; 3: MAD temp[2], const[3], input[0].wwww, temp[0]; 4: MOV output[0], temp[2]; 5: MOV output[1], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], const[0], input[0].xxxx; 1: MAD temp[1], const[1], input[0].yyyy, temp[0]; 2: MAD temp[0], const[2], input[0].zzzz, temp[1]; 3: MAD temp[2], const[3], input[0].wwww, temp[0]; 4: MOV output[0], temp[2]; 5: MOV output[1], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0], const[0], input[0].xxxx; 1: MAD temp[1], const[1], input[0].yyyy, temp[0]; 2: MAD temp[0], const[2], input[0].zzzz, temp[1]; 3: MAD temp[2], const[3], input[0].wwww, temp[0]; 4: MOV output[0], temp[2]; 5: MOV output[1], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0], const[0], input[0].xxxx; 1: MAD temp[1], const[1], input[0].yyyy, temp[0]; 2: MAD temp[0], const[2], input[0].zzzz, temp[1]; 3: MAD temp[2], const[3], input[0].wwww, temp[0]; 4: MOV output[0], temp[2]; 5: MOV output[1], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0], const[0], input[0].xxxx; 1: MAD temp[1], const[1], input[0].yyyy, temp[0]; 2: MAD temp[0], const[2], input[0].zzzz, temp[1]; 3: MAD temp[2], const[3], input[0].wwww, temp[0]; 4: MOV output[0], temp[2]; 5: MOV output[1], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MUL temp[0], const[0], input[0].xxxx; 1: MAD temp[1], const[1], input[0].yyyy, temp[0]; 2: MAD temp[0], const[2], input[0].zzzz, temp[1]; 3: MAD temp[2], const[3], input[0].wwww, temp[0]; 4: MOV output[0], temp[2]; 5: MOV output[1], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0], const[0], input[0].xxxx; 1: MAD temp[1], const[1], input[0].yyyy, temp[0]; 2: MAD temp[0], const[2], input[0].zzzz, temp[1]; 3: MAD temp[0], const[3], input[0].wwww, temp[0]; 4: MOV output[0], temp[0]; 5: MOV output[1], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0], const[0], input[0].xxxx; 1: MAD temp[1], const[1], input[0].yyyy, temp[0]; 2: MAD temp[0], const[2], input[0].zzzz, temp[1]; 3: MAD temp[0], const[3], input[0].wwww, temp[0]; 4: MOV output[0], temp[0]; 5: MOV output[1], temp[0]; Final vertex program code: 0: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d10002 reg: 0c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 1: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d10022 reg: 1c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 2: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 3: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 4: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 5: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], GENERIC[10], PERSPECTIVE DCL IN[1], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..2] DCL TEMP[0..3] 0: TEX TEMP[0], IN[1].xyyy, SAMP[1], 2D 1: MUL TEMP[1].w, TEMP[0].xxxw, CONST[0].xxxx 2: TEX TEMP[2].xyz, IN[0].xyyy, SAMP[0], 2D 3: MAD TEMP[3].xyz, TEMP[2].xyzz, CONST[1].xyzz, CONST[2].xyzz 4: MUL TEMP[1].xyz, TEMP[0].xyzx, TEMP[3].xyzx 5: MOV OUT[0], TEMP[1] 6: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: MAD temp[3].xyz, temp[2].xyzz, const[1].xyzz, const[2].xyzz; 4: MUL temp[1].xyz, temp[0].xyzx, temp[3].xyzx; 5: MOV output[0], temp[1]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: MAD temp[3].xyz, temp[2].xyzz, const[1].xyzz, const[2].xyzz; 4: MUL temp[1].xyz, temp[0].xyzx, temp[3].xyzx; 5: MOV output[0], temp[1]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: MAD temp[3].xyz, temp[2].xyzz, const[1].xyzz, const[2].xyzz; 4: MUL temp[1].xyz, temp[0].xyzx, temp[3].xyzx; 5: MOV output[0], temp[1]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: MAD temp[3].xyz, temp[2].xyzz, const[1].xyzz, const[2].xyzz; 4: MUL temp[1].xyz, temp[0].xyzx, temp[3].xyzx; 5: MOV output[0], temp[1]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: MAD temp[3].xyz, temp[2].xyzz, const[1].xyzz, const[2].xyzz; 4: MUL temp[1].xyz, temp[0].xyzx, temp[3].xyzx; 5: MOV output[0], temp[1]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[4], input[0].xyyy, 2D[0]; 3: MOV temp[2].xyz, temp[4]; 4: MAD temp[3].xyz, temp[2].xyzz, const[1].xyzz, const[2].xyzz; 5: MUL temp[1].xyz, temp[0].xyzx, temp[3].xyzx; 6: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[4], input[0].xyyy, 2D[0]; 3: MOV temp[2].xyz, temp[4]; 4: MAD temp[3].xyz, temp[2].xyzz, const[1].xyzz, const[2].xyzz; 5: MUL temp[1].xyz, temp[0].xyzx, temp[3].xyzx; 6: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[4].xyz, input[0].xy__, 2D[0]; 3: MOV temp[2].xyz, temp[4].xyz_; 4: MAD temp[3].xyz, temp[2].xyz_, const[1].xyz_, const[2].xyz_; 5: MUL temp[1].xyz, temp[0].xyz_, temp[3].xyz_; 6: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[4].xyz, input[0].xy__, 2D[0]; 3: MOV temp[2].xyz, temp[4].xyz_; 4: MAD temp[3].xyz, temp[2].xyz_, const[1].xyz_, const[2].xyz_; 5: MUL temp[1].xyz, temp[0].xyz_, temp[3].xyz_; 6: MOV output[0], temp[1]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[4].xyz, input[0].xy__, 2D[0]; 3: MAD temp[3].xyz, temp[4].xyz_, const[1].xyz_, const[2].xyz_; 4: MUL temp[1].xyz, temp[0].xyz_, temp[3].xyz_; 5: MOV output[0], temp[1]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[4].xyz, input[0].xy__, 2D[0]; 3: MAD temp[3].xyz, temp[4].xyz_, const[1].xyz_, const[2].xyz_; 4: MUL temp[1].xyz, temp[0].xyz_, temp[3].xyz_; 5: MOV output[0], temp[1]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[4].xyz, input[0].xy__, 2D[0]; 3: MAD temp[3].xyz, temp[4].xyz_, const[1].xyz_, const[2].xyz_; 4: MUL temp[1].xyz, temp[0].xyz_, temp[3].xyz_; 5: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[2].xyz, input[0].xy__, 2D[0]; 3: MAD temp[3].xyz, temp[2].xyz_, const[1].xyz_, const[2].xyz_; 4: MUL temp[1].xyz, temp[0].xyz_, temp[3].xyz_; 5: MOV output[0], temp[1]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: src0.xyz = const[0], src0.w = temp[0] MAD temp[1].w, src0.w, src0.x, src0.0 2: TEX temp[2].xyz, input[0].xy__, 2D[0]; 3: src0.xyz = temp[2], src1.xyz = const[1], src2.xyz = const[2] MAD temp[3].xyz, src0.xyz, src1.xyz, src2.xyz 4: src0.xyz = temp[0], src1.xyz = temp[3] MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 5: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[1].xy__, 2D[1]; 2: TEX temp[2].xyz, input[0].xy__, 2D[0]; 3: src0.xyz = temp[2], src1.xyz = const[1], src2.xyz = const[2] MAD temp[3].xyz, src0.xyz, src1.xyz, src2.xyz 4: src0.xyz = temp[0], src0.w = temp[0], src1.xyz = temp[3], src2.xyz = const[0] MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[1].w, src0.w, src2.x, src0.0 5: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], temp[1].xy__, 2D[1]; 2: TEX temp[0].xyz, temp[0].xy__, 2D[0]; 3: src0.xyz = temp[0], src1.xyz = const[1], src2.xyz = const[2] MAD temp[0].xyz, src0.xyz, src1.xyz, src2.xyz 4: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = temp[0], src2.xyz = const[0] MAD temp[0].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[0].w, src0.w, src2.x, src0.0 5: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=7************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 2, tex_end: 1 (code_addr: 00420080) TEX: TEX t1, t1, texture[1] (00008841) TEX t0, t0, texture[0] (00008000) 0: xyz: t0 c1 c2 bias-> t0.xyz (03822840) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz c1.xyz c2.xyz op: 00020200 w: t0.x t0.x t0.x op: 00000000 1: xyz: t1 t0 c0 bias-> t0.xyz (03820001) w: t1 t0 t0 bias-> t0.w (00800001) xyz: t1.xyz t0.xyz 0.0 op: 00050200 w: t1.w c0.x 0.0 op: 00040309 2: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[10] DCL OUT[2], GENERIC[11] DCL CONST[0..7] DCL TEMP[0..1] 0: MUL TEMP[0], CONST[0], IN[1].xxxx 1: MAD TEMP[1], CONST[1], IN[1].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[1].zzzz, TEMP[1] 3: MAD OUT[2].xy, CONST[3].xyxx, IN[1].wwxx, TEMP[0].xyxx 4: MOV OUT[1].xy, IN[2].xyxx 5: MUL TEMP[0], CONST[4], IN[0].xxxx 6: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[0] 7: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[1] 8: MAD OUT[0], CONST[7], IN[0].wwww, TEMP[0] 9: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MOV output[1].xy, input[2].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MOV output[1].xy, input[2].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MOV output[1].xy, input[2].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MOV output[1].xy, input[2].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MOV output[1].xy, input[2].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MOV output[1].xy, input[2].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MOV output[1].xy, input[2].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MOV output[1].xy, input[2].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MOV output[1].xy, input[2].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MOV output[1].xy, input[2].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MOV output[1].xy, input[2].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[0], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[0]; 10: MOV output[3], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MOV output[1].xy, input[2].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[0], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[0]; 10: MOV output[3], temp[0]; Final vertex program code: 0: op: 0x00300002 dst: 0t op: VE_MULTIPLY src0: 0x01f90002 reg: 0c swiz: X/ Y/ U/ U src1: 0x01f80021 reg: 1i swiz: X/ X/ U/ U src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00302004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x01f90022 reg: 1c swiz: X/ Y/ U/ U src1: 0x01f92021 reg: 1i swiz: Y/ Y/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 2: op: 0x00300004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x01f90042 reg: 2c swiz: X/ Y/ U/ U src1: 0x01fa4021 reg: 1i swiz: Z/ Z/ U/ U src2: 0x01f90020 reg: 1t swiz: X/ Y/ U/ U 3: op: 0x00304204 dst: 2o op: VE_MULTIPLY_ADD src0: 0x01f90062 reg: 3c swiz: X/ Y/ U/ U src1: 0x01fb6021 reg: 1i swiz: W/ W/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 4: op: 0x00302203 dst: 1o op: VE_ADD src0: 0x01f90041 reg: 2i swiz: X/ Y/ U/ U src1: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 5: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 6: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 7: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 8: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 9: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 10: op: 0x00f06203 dst: 3o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], GENERIC[10], PERSPECTIVE DCL IN[1], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL CONST[0..3] DCL TEMP[0..3] 0: TEX TEMP[0], IN[1].xyyy, SAMP[2], 2D 1: MUL TEMP[1].w, TEMP[0].xxxw, CONST[0].xxxx 2: TEX TEMP[2].xyz, IN[0].xyyy, SAMP[0], 2D 3: MAD TEMP[3].xyz, TEMP[2].xyzz, CONST[2].xyzz, CONST[3].xyzz 4: MUL TEMP[2].xyz, TEMP[0].xyzz, TEMP[3].xyzz 5: TEX TEMP[0].xyz, IN[1].xyyy, SAMP[1], 2D 6: MAD TEMP[1].xyz, TEMP[0].xyzx, CONST[1].xyzx, TEMP[2].xyzx 7: MOV OUT[0], TEMP[1] 8: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[2]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: MAD temp[3].xyz, temp[2].xyzz, const[2].xyzz, const[3].xyzz; 4: MUL temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 5: TEX temp[0].xyz, input[1].xyyy, 2D[1]; 6: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[2].xyzx; 7: MOV output[0], temp[1]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[2]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: MAD temp[3].xyz, temp[2].xyzz, const[2].xyzz, const[3].xyzz; 4: MUL temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 5: TEX temp[0].xyz, input[1].xyyy, 2D[1]; 6: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[2].xyzx; 7: MOV output[0], temp[1]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[2]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: MAD temp[3].xyz, temp[2].xyzz, const[2].xyzz, const[3].xyzz; 4: MUL temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 5: TEX temp[0].xyz, input[1].xyyy, 2D[1]; 6: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[2].xyzx; 7: MOV output[0], temp[1]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[2]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: MAD temp[3].xyz, temp[2].xyzz, const[2].xyzz, const[3].xyzz; 4: MUL temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 5: TEX temp[0].xyz, input[1].xyyy, 2D[1]; 6: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[2].xyzx; 7: MOV output[0], temp[1]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[2]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: MAD temp[3].xyz, temp[2].xyzz, const[2].xyzz, const[3].xyzz; 4: MUL temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 5: TEX temp[0].xyz, input[1].xyyy, 2D[1]; 6: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[2].xyzx; 7: MOV output[0], temp[1]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[2]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[4], input[0].xyyy, 2D[0]; 3: MOV temp[2].xyz, temp[4]; 4: MAD temp[3].xyz, temp[2].xyzz, const[2].xyzz, const[3].xyzz; 5: MUL temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 6: TEX temp[5], input[1].xyyy, 2D[1]; 7: MOV temp[0].xyz, temp[5]; 8: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[2].xyzx; 9: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[2]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[4], input[0].xyyy, 2D[0]; 3: MOV temp[2].xyz, temp[4]; 4: MAD temp[3].xyz, temp[2].xyzz, const[2].xyzz, const[3].xyzz; 5: MUL temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 6: TEX temp[5], input[1].xyyy, 2D[1]; 7: MOV temp[0].xyz, temp[5]; 8: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[2].xyzx; 9: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[2]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[4].xyz, input[0].xy__, 2D[0]; 3: MOV temp[2].xyz, temp[4].xyz_; 4: MAD temp[3].xyz, temp[2].xyz_, const[2].xyz_, const[3].xyz_; 5: MUL temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 6: TEX temp[5].xyz, input[1].xy__, 2D[1]; 7: MOV temp[0].xyz, temp[5].xyz_; 8: MAD temp[1].xyz, temp[0].xyz_, const[1].xyz_, temp[2].xyz_; 9: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[2]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[4].xyz, input[0].xy__, 2D[0]; 3: MOV temp[2].xyz, temp[4].xyz_; 4: MAD temp[3].xyz, temp[2].xyz_, const[2].xyz_, const[3].xyz_; 5: MUL temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 6: TEX temp[5].xyz, input[1].xy__, 2D[1]; 7: MOV temp[0].xyz, temp[5].xyz_; 8: MAD temp[1].xyz, temp[0].xyz_, const[1].xyz_, temp[2].xyz_; 9: MOV output[0], temp[1]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[2]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[4].xyz, input[0].xy__, 2D[0]; 3: MAD temp[3].xyz, temp[4].xyz_, const[2].xyz_, const[3].xyz_; 4: MUL temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 5: TEX temp[5].xyz, input[1].xy__, 2D[1]; 6: MAD temp[1].xyz, temp[5].xyz_, const[1].xyz_, temp[2].xyz_; 7: MOV output[0], temp[1]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[2]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[4].xyz, input[0].xy__, 2D[0]; 3: MAD temp[3].xyz, temp[4].xyz_, const[2].xyz_, const[3].xyz_; 4: MUL temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 5: TEX temp[5].xyz, input[1].xy__, 2D[1]; 6: MAD temp[1].xyz, temp[5].xyz_, const[1].xyz_, temp[2].xyz_; 7: MOV output[0], temp[1]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[2]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[4].xyz, input[0].xy__, 2D[0]; 3: MAD temp[3].xyz, temp[4].xyz_, const[2].xyz_, const[3].xyz_; 4: MUL temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 5: TEX temp[5].xyz, input[1].xy__, 2D[1]; 6: MAD temp[1].xyz, temp[5].xyz_, const[1].xyz_, temp[2].xyz_; 7: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[2]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[2].xyz, input[0].xy__, 2D[0]; 3: MAD temp[3].xyz, temp[2].xyz_, const[2].xyz_, const[3].xyz_; 4: MUL temp[4].xyz, temp[0].xyz_, temp[3].xyz_; 5: TEX temp[5].xyz, input[1].xy__, 2D[1]; 6: MAD temp[1].xyz, temp[5].xyz_, const[1].xyz_, temp[4].xyz_; 7: MOV output[0], temp[1]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[2]; 1: src0.xyz = const[0], src0.w = temp[0] MAD temp[1].w, src0.w, src0.x, src0.0 2: TEX temp[2].xyz, input[0].xy__, 2D[0]; 3: src0.xyz = temp[2], src1.xyz = const[2], src2.xyz = const[3] MAD temp[3].xyz, src0.xyz, src1.xyz, src2.xyz 4: src0.xyz = temp[0], src1.xyz = temp[3] MAD temp[4].xyz, src0.xyz, src1.xyz, src0.000 5: TEX temp[5].xyz, input[1].xy__, 2D[1]; 6: src0.xyz = temp[5], src1.xyz = const[1], src2.xyz = temp[4] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 7: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[1].xy__, 2D[2]; 2: TEX temp[2].xyz, input[0].xy__, 2D[0]; 3: TEX temp[5].xyz, input[1].xy__, 2D[1]; 4: src0.xyz = temp[2], src1.xyz = const[2], src2.xyz = const[3] MAD temp[3].xyz, src0.xyz, src1.xyz, src2.xyz 5: src0.xyz = temp[0], src0.w = temp[0], src1.xyz = temp[3], src2.xyz = const[0] MAD temp[4].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[1].w, src0.w, src2.x, src0.0 6: src0.xyz = temp[5], src1.xyz = const[1], src2.xyz = temp[4] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 7: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[2], temp[1].xy__, 2D[2]; 2: TEX temp[0].xyz, temp[0].xy__, 2D[0]; 3: TEX temp[3].xyz, temp[1].xy__, 2D[1]; 4: src0.xyz = temp[0], src1.xyz = const[2], src2.xyz = const[3] MAD temp[0].xyz, src0.xyz, src1.xyz, src2.xyz 5: src0.xyz = temp[2], src0.w = temp[2], src1.xyz = temp[0], src2.xyz = const[0] MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[0].w, src0.w, src2.x, src0.0 6: src0.xyz = temp[3], src1.xyz = const[1], src2.xyz = temp[1] MAD temp[0].xyz, src0.xyz, src1.xyz, src2.xyz 7: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=8************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 3, tex_end: 2 (code_addr: 004400c0) TEX: TEX t2, t1, texture[2] (00009081) TEX t0, t0, texture[0] (00008000) TEX t3, t1, texture[1] (000088c1) 0: xyz: t0 c2 c3 bias-> t0.xyz (03823880) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz c2.xyz c3.xyz op: 00020200 w: t0.x t0.x t0.x op: 00000000 1: xyz: t2 t0 c0 bias-> t1.xyz (03860002) w: t2 t0 t0 bias-> t0.w (00800002) xyz: t2.xyz t0.xyz 0.0 op: 00050200 w: t2.w c0.x 0.0 op: 00040309 2: xyz: t3 c1 t1 bias-> t0.xyz (03801843) w: t0 t0 t0 bias-> (00000000) xyz: t3.xyz c1.xyz t1.xyz op: 00020200 w: t3.x t3.x t3.x op: 00000000 3: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[10] DCL OUT[2], GENERIC[11] DCL CONST[0..7] DCL TEMP[0..1] 0: MUL TEMP[0], CONST[0], IN[1].xxxx 1: MAD TEMP[1], CONST[1], IN[1].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[1].zzzz, TEMP[1] 3: MAD OUT[2].xy, CONST[3].xyxx, IN[1].wwxx, TEMP[0].xyxx 4: MOV OUT[1].xy, IN[2].xyxx 5: MUL TEMP[0], CONST[4], IN[0].xxxx 6: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[0] 7: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[1] 8: MAD OUT[0], CONST[7], IN[0].wwww, TEMP[0] 9: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MOV output[1].xy, input[2].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MOV output[1].xy, input[2].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MOV output[1].xy, input[2].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MOV output[1].xy, input[2].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MOV output[1].xy, input[2].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MOV output[1].xy, input[2].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MOV output[1].xy, input[2].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MOV output[1].xy, input[2].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MOV output[1].xy, input[2].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MOV output[1].xy, input[2].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MOV output[1].xy, input[2].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[0], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[0]; 10: MOV output[3], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MOV output[1].xy, input[2].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[0], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[0]; 10: MOV output[3], temp[0]; Final vertex program code: 0: op: 0x00300002 dst: 0t op: VE_MULTIPLY src0: 0x01f90002 reg: 0c swiz: X/ Y/ U/ U src1: 0x01f80021 reg: 1i swiz: X/ X/ U/ U src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00302004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x01f90022 reg: 1c swiz: X/ Y/ U/ U src1: 0x01f92021 reg: 1i swiz: Y/ Y/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 2: op: 0x00300004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x01f90042 reg: 2c swiz: X/ Y/ U/ U src1: 0x01fa4021 reg: 1i swiz: Z/ Z/ U/ U src2: 0x01f90020 reg: 1t swiz: X/ Y/ U/ U 3: op: 0x00304204 dst: 2o op: VE_MULTIPLY_ADD src0: 0x01f90062 reg: 3c swiz: X/ Y/ U/ U src1: 0x01fb6021 reg: 1i swiz: W/ W/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 4: op: 0x00302203 dst: 1o op: VE_ADD src0: 0x01f90041 reg: 2i swiz: X/ Y/ U/ U src1: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 5: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 6: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 7: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 8: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 9: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 10: op: 0x00f06203 dst: 3o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], COLOR, LINEAR DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0] 0: TEX TEMP[0], IN[1].xyyy, SAMP[0], 2D 1: MUL OUT[0], IN[0], TEMP[0] 2: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL output[0], input[0], temp[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: src0.xyz = input[0], src0.w = input[0], src1.xyz = temp[0], src1.w = temp[0] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[1].xy__, 2D[0]; 2: src0.xyz = input[0], src0.w = input[0], src1.xyz = temp[0], src1.w = temp[0] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], temp[1].xy__, 2D[0]; 2: src0.xyz = temp[0], src0.w = temp[0], src1.xyz = temp[1], src1.w = temp[1] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 pc=9************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) TEX: TEX t1, t1, texture[0] (00008041) 0: xyz: t0 t1 t0 bias-> o0.xyz (1c000040) w: t0 t1 t0 bias-> o0.w (01000040) xyz: t0.xyz t1.xyz 0.0 op: 00050200 w: t0.w t1.w 0.0 op: 00040509 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[10] DCL CONST[0..3] DCL TEMP[0..1] 0: MOV OUT[1], IN[1] 1: MOV OUT[2].xy, IN[2].xyxx 2: MUL TEMP[0], CONST[0], IN[0].xxxx 3: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[0] 4: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[1] 5: MAD OUT[0], CONST[3], IN[0].wwww, TEMP[0] 6: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xyxx; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xyxx; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xyxx; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xyxx; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xyxx; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xyxx; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[2], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[2]; 7: MOV output[3], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[0], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: MUL temp[0], const[0], input[0].xxxx; 3: MAD temp[1], const[1], input[0].yyyy, temp[0]; 4: MAD temp[0], const[2], input[0].zzzz, temp[1]; 5: MAD temp[0], const[3], input[0].wwww, temp[0]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00304203 dst: 2o op: VE_ADD src0: 0x01f90041 reg: 2i swiz: X/ Y/ U/ U src1: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 2: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d10002 reg: 0c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 3: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d10022 reg: 1c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 4: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 5: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 6: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 7: op: 0x00f06203 dst: 3o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], COLOR, LINEAR DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=10************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..3] DCL TEMP[0..1] 0: MOV OUT[1], IN[1] 1: MUL TEMP[0], CONST[0], IN[0].xxxx 2: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[0] 3: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[1] 4: MAD OUT[0], CONST[3], IN[0].wwww, TEMP[0] 5: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[2], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[2]; 6: MOV output[2], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[0], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[0]; 6: MOV output[2], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[0].xxxx; 2: MAD temp[1], const[1], input[0].yyyy, temp[0]; 3: MAD temp[0], const[2], input[0].zzzz, temp[1]; 4: MAD temp[0], const[3], input[0].wwww, temp[0]; 5: MOV output[0], temp[0]; 6: MOV output[2], temp[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d10002 reg: 0c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 2: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d10022 reg: 1c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 3: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 4: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 5: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 6: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], COLOR, PERSPECTIVE DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=11************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial fragment program FRAG DCL IN[0], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..2] DCL TEMP[0..2] 0: TEX TEMP[0], IN[0].xyyy, SAMP[1], 2D 1: MUL TEMP[1].w, TEMP[0].xxxw, CONST[0].xxxx 2: MUL TEMP[2].xyz, TEMP[0].xyzz, CONST[2].xyzz 3: TEX TEMP[0].xyz, IN[0].xyyy, SAMP[0], 2D 4: MAD TEMP[1].xyz, TEMP[0].xyzx, CONST[1].xyzx, TEMP[2].xyzx 5: MOV OUT[0], TEMP[1] 6: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MUL temp[2].xyz, temp[0].xyzz, const[2].xyzz; 3: TEX temp[0].xyz, input[0].xyyy, 2D[0]; 4: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[2].xyzx; 5: MOV output[0], temp[1]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MUL temp[2].xyz, temp[0].xyzz, const[2].xyzz; 3: TEX temp[0].xyz, input[0].xyyy, 2D[0]; 4: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[2].xyzx; 5: MOV output[0], temp[1]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MUL temp[2].xyz, temp[0].xyzz, const[2].xyzz; 3: TEX temp[0].xyz, input[0].xyyy, 2D[0]; 4: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[2].xyzx; 5: MOV output[0], temp[1]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MUL temp[2].xyz, temp[0].xyzz, const[2].xyzz; 3: TEX temp[0].xyz, input[0].xyyy, 2D[0]; 4: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[2].xyzx; 5: MOV output[0], temp[1]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MUL temp[2].xyz, temp[0].xyzz, const[2].xyzz; 3: TEX temp[0].xyz, input[0].xyyy, 2D[0]; 4: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[2].xyzx; 5: MOV output[0], temp[1]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MUL temp[2].xyz, temp[0].xyzz, const[2].xyzz; 3: TEX temp[3], input[0].xyyy, 2D[0]; 4: MOV temp[0].xyz, temp[3]; 5: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[2].xyzx; 6: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MUL temp[2].xyz, temp[0].xyzz, const[2].xyzz; 3: TEX temp[3], input[0].xyyy, 2D[0]; 4: MOV temp[0].xyz, temp[3]; 5: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[2].xyzx; 6: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MUL temp[2].xyz, temp[0].xyz_, const[2].xyz_; 3: TEX temp[3].xyz, input[0].xy__, 2D[0]; 4: MOV temp[0].xyz, temp[3].xyz_; 5: MAD temp[1].xyz, temp[0].xyz_, const[1].xyz_, temp[2].xyz_; 6: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MUL temp[2].xyz, temp[0].xyz_, const[2].xyz_; 3: TEX temp[3].xyz, input[0].xy__, 2D[0]; 4: MOV temp[0].xyz, temp[3].xyz_; 5: MAD temp[1].xyz, temp[0].xyz_, const[1].xyz_, temp[2].xyz_; 6: MOV output[0], temp[1]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MUL temp[2].xyz, temp[0].xyz_, const[2].xyz_; 3: TEX temp[3].xyz, input[0].xy__, 2D[0]; 4: MAD temp[1].xyz, temp[3].xyz_, const[1].xyz_, temp[2].xyz_; 5: MOV output[0], temp[1]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MUL temp[2].xyz, temp[0].xyz_, const[2].xyz_; 3: TEX temp[3].xyz, input[0].xy__, 2D[0]; 4: MAD temp[1].xyz, temp[3].xyz_, const[1].xyz_, temp[2].xyz_; 5: MOV output[0], temp[1]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MUL temp[2].xyz, temp[0].xyz_, const[2].xyz_; 3: TEX temp[3].xyz, input[0].xy__, 2D[0]; 4: MAD temp[1].xyz, temp[3].xyz_, const[1].xyz_, temp[2].xyz_; 5: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MUL temp[2].xyz, temp[0].xyz_, const[2].xyz_; 3: TEX temp[3].xyz, input[0].xy__, 2D[0]; 4: MAD temp[1].xyz, temp[3].xyz_, const[1].xyz_, temp[2].xyz_; 5: MOV output[0], temp[1]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[1]; 1: src0.xyz = const[0], src0.w = temp[0] MAD temp[1].w, src0.w, src0.x, src0.0 2: src0.xyz = temp[0], src1.xyz = const[2] MAD temp[2].xyz, src0.xyz, src1.xyz, src0.000 3: TEX temp[3].xyz, input[0].xy__, 2D[0]; 4: src0.xyz = temp[3], src1.xyz = const[1], src2.xyz = temp[2] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 5: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[0].xy__, 2D[1]; 2: TEX temp[3].xyz, input[0].xy__, 2D[0]; 3: src0.xyz = temp[0], src0.w = temp[0], src1.xyz = const[2], src2.xyz = const[0] MAD temp[2].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[1].w, src0.w, src2.x, src0.0 4: src0.xyz = temp[3], src1.xyz = const[1], src2.xyz = temp[2] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 5: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], temp[0].xy__, 2D[1]; 2: TEX temp[2].xyz, temp[0].xy__, 2D[0]; 3: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = const[2], src2.xyz = const[0] MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[0].w, src0.w, src2.x, src0.0 4: src0.xyz = temp[2], src1.xyz = const[1], src2.xyz = temp[1] MAD temp[0].xyz, src0.xyz, src1.xyz, src2.xyz 5: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=12************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 2, tex_end: 1 (code_addr: 00420080) TEX: TEX t1, t0, texture[1] (00008840) TEX t2, t0, texture[0] (00008080) 0: xyz: t1 c2 c0 bias-> t1.xyz (03860881) w: t1 t0 t0 bias-> t0.w (00800001) xyz: t1.xyz c2.xyz 0.0 op: 00050200 w: t1.w c0.x 0.0 op: 00040309 1: xyz: t2 c1 t1 bias-> t0.xyz (03801842) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz c1.xyz t1.xyz op: 00020200 w: t2.x t2.x t2.x op: 00000000 2: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[10] DCL CONST[0..7] DCL TEMP[0..1] 0: MUL TEMP[0], CONST[0], IN[1].xxxx 1: MAD TEMP[1], CONST[1], IN[1].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[1].zzzz, TEMP[1] 3: MAD OUT[1].xy, CONST[3].xyxx, IN[1].wwxx, TEMP[0].xyxx 4: MUL TEMP[0], CONST[4], IN[0].xxxx 5: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[0] 6: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[1] 7: MAD OUT[0], CONST[7], IN[0].wwww, TEMP[0] 8: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[0], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[0]; 9: MOV output[2], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[0], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[0]; 9: MOV output[2], temp[0]; Final vertex program code: 0: op: 0x00300002 dst: 0t op: VE_MULTIPLY src0: 0x01f90002 reg: 0c swiz: X/ Y/ U/ U src1: 0x01f80021 reg: 1i swiz: X/ X/ U/ U src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00302004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x01f90022 reg: 1c swiz: X/ Y/ U/ U src1: 0x01f92021 reg: 1i swiz: Y/ Y/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 2: op: 0x00300004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x01f90042 reg: 2c swiz: X/ Y/ U/ U src1: 0x01fa4021 reg: 1i swiz: Z/ Z/ U/ U src2: 0x01f90020 reg: 1t swiz: X/ Y/ U/ U 3: op: 0x00302204 dst: 1o op: VE_MULTIPLY_ADD src0: 0x01f90062 reg: 3c swiz: X/ Y/ U/ U src1: 0x01fb6021 reg: 1i swiz: W/ W/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 4: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 5: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 6: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 7: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 8: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 9: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[1], input[0], 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[1], input[0], 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MOV output[0], temp[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[0].xy__, 2D[0]; 2: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], temp[0].xy__, 2D[0]; 2: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=13************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) TEX: TEX t0, t0, texture[0] (00008000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial fragment program FRAG DCL IN[0], COLOR, LINEAR DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..2] DCL TEMP[0..2] 0: TEX TEMP[0], IN[1].xyyy, SAMP[0], 2D 1: MUL TEMP[1].w, TEMP[0].xxxw, CONST[0].xxxx 2: MAD TEMP[2].xyz, IN[0].xyzz, CONST[1].xyzz, CONST[2].xyzz 3: MUL TEMP[1].xyz, TEMP[0].xyzx, TEMP[2].xyzx 4: MOV OUT[0], TEMP[1] 5: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MAD temp[2].xyz, input[0].xyzz, const[1].xyzz, const[2].xyzz; 3: MUL temp[1].xyz, temp[0].xyzx, temp[2].xyzx; 4: MOV output[0], temp[1]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MAD temp[2].xyz, input[0].xyzz, const[1].xyzz, const[2].xyzz; 3: MUL temp[1].xyz, temp[0].xyzx, temp[2].xyzx; 4: MOV output[0], temp[1]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MAD temp[2].xyz, input[0].xyzz, const[1].xyzz, const[2].xyzz; 3: MUL temp[1].xyz, temp[0].xyzx, temp[2].xyzx; 4: MOV output[0], temp[1]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MAD temp[2].xyz, input[0].xyzz, const[1].xyzz, const[2].xyzz; 3: MUL temp[1].xyz, temp[0].xyzx, temp[2].xyzx; 4: MOV output[0], temp[1]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MAD temp[2].xyz, input[0].xyzz, const[1].xyzz, const[2].xyzz; 3: MUL temp[1].xyz, temp[0].xyzx, temp[2].xyzx; 4: MOV output[0], temp[1]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MAD temp[2].xyz, input[0].xyzz, const[1].xyzz, const[2].xyzz; 3: MUL temp[1].xyz, temp[0].xyzx, temp[2].xyzx; 4: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MAD temp[2].xyz, input[0].xyzz, const[1].xyzz, const[2].xyzz; 3: MUL temp[1].xyz, temp[0].xyzx, temp[2].xyzx; 4: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MAD temp[2].xyz, input[0].xyz_, const[1].xyz_, const[2].xyz_; 3: MUL temp[1].xyz, temp[0].xyz_, temp[2].xyz_; 4: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MAD temp[2].xyz, input[0].xyz_, const[1].xyz_, const[2].xyz_; 3: MUL temp[1].xyz, temp[0].xyz_, temp[2].xyz_; 4: MOV output[0], temp[1]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MAD temp[2].xyz, input[0].xyz_, const[1].xyz_, const[2].xyz_; 3: MUL temp[1].xyz, temp[0].xyz_, temp[2].xyz_; 4: MOV output[0], temp[1]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MAD temp[2].xyz, input[0].xyz_, const[1].xyz_, const[2].xyz_; 3: MUL temp[1].xyz, temp[0].xyz_, temp[2].xyz_; 4: MOV output[0], temp[1]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MAD temp[2].xyz, input[0].xyz_, const[1].xyz_, const[2].xyz_; 3: MUL temp[1].xyz, temp[0].xyz_, temp[2].xyz_; 4: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MAD temp[2].xyz, input[0].xyz_, const[1].xyz_, const[2].xyz_; 3: MUL temp[1].xyz, temp[0].xyz_, temp[2].xyz_; 4: MOV output[0], temp[1]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: src0.xyz = const[0], src0.w = temp[0] MAD temp[1].w, src0.w, src0.x, src0.0 2: src0.xyz = input[0], src1.xyz = const[1], src2.xyz = const[2] MAD temp[2].xyz, src0.xyz, src1.xyz, src2.xyz 3: src0.xyz = temp[0], src1.xyz = temp[2] MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 4: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[1].xy__, 2D[0]; 2: src0.xyz = input[0], src1.xyz = const[1], src2.xyz = const[2] MAD temp[2].xyz, src0.xyz, src1.xyz, src2.xyz 3: src0.xyz = temp[0], src0.w = temp[0], src1.xyz = temp[2], src2.xyz = const[0] MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[1].w, src0.w, src2.x, src0.0 4: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], temp[1].xy__, 2D[0]; 2: src0.xyz = temp[0], src1.xyz = const[1], src2.xyz = const[2] MAD temp[0].xyz, src0.xyz, src1.xyz, src2.xyz 3: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = temp[0], src2.xyz = const[0] MAD temp[0].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[0].w, src0.w, src2.x, src0.0 4: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=14************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 2, tex_end: 0 (code_addr: 00400080) TEX: TEX t1, t1, texture[0] (00008041) 0: xyz: t0 c1 c2 bias-> t0.xyz (03822840) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz c1.xyz c2.xyz op: 00020200 w: t0.x t0.x t0.x op: 00000000 1: xyz: t1 t0 c0 bias-> t0.xyz (03820001) w: t1 t0 t0 bias-> t0.w (00800001) xyz: t1.xyz t0.xyz 0.0 op: 00050200 w: t1.w c0.x 0.0 op: 00040309 2: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[10] DCL CONST[0..7] DCL TEMP[0..1] 0: MOV OUT[1], IN[1] 1: MUL TEMP[0], CONST[0], IN[2].xxxx 2: MAD TEMP[1], CONST[1], IN[2].yyyy, TEMP[0] 3: MAD TEMP[0], CONST[2], IN[2].zzzz, TEMP[1] 4: MAD OUT[2].xy, CONST[3].xyxx, IN[2].wwxx, TEMP[0].xyxx 5: MUL TEMP[0], CONST[4], IN[0].xxxx 6: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[0] 7: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[1] 8: MAD OUT[0], CONST[7], IN[0].wwww, TEMP[0] 9: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[2].xxxx; 2: MAD temp[1], const[1], input[2].yyyy, temp[0]; 3: MAD temp[0], const[2], input[2].zzzz, temp[1]; 4: MAD output[2].xy, const[3].xyxx, input[2].wwxx, temp[0].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[2].xxxx; 2: MAD temp[1], const[1], input[2].yyyy, temp[0]; 3: MAD temp[0], const[2], input[2].zzzz, temp[1]; 4: MAD output[2].xy, const[3].xyxx, input[2].wwxx, temp[0].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[2].xxxx; 2: MAD temp[1], const[1], input[2].yyyy, temp[0]; 3: MAD temp[0], const[2], input[2].zzzz, temp[1]; 4: MAD output[2].xy, const[3].xyxx, input[2].wwxx, temp[0].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[2].xxxx; 2: MAD temp[1], const[1], input[2].yyyy, temp[0]; 3: MAD temp[0], const[2], input[2].zzzz, temp[1]; 4: MAD output[2].xy, const[3].xyxx, input[2].wwxx, temp[0].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[2].xxxx; 2: MAD temp[1], const[1], input[2].yyyy, temp[0]; 3: MAD temp[0], const[2], input[2].zzzz, temp[1]; 4: MAD output[2].xy, const[3].xyxx, input[2].wwxx, temp[0].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[2].xxxx; 2: MAD temp[1], const[1], input[2].yyyy, temp[0]; 3: MAD temp[0], const[2], input[2].zzzz, temp[1]; 4: MAD output[2].xy, const[3].xyxx, input[2].wwxx, temp[0].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0].xy, const[0].xy__, input[2].xx__; 2: MAD temp[1].xy, const[1].xy__, input[2].yy__, temp[0].xy__; 3: MAD temp[0].xy, const[2].xy__, input[2].zz__, temp[1].xy__; 4: MAD output[2].xy, const[3].xy__, input[2].ww__, temp[0].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0].xy, const[0].xy__, input[2].xx__; 2: MAD temp[1].xy, const[1].xy__, input[2].yy__, temp[0].xy__; 3: MAD temp[0].xy, const[2].xy__, input[2].zz__, temp[1].xy__; 4: MAD output[2].xy, const[3].xy__, input[2].ww__, temp[0].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0].xy, const[0].xy__, input[2].xx__; 2: MAD temp[1].xy, const[1].xy__, input[2].yy__, temp[0].xy__; 3: MAD temp[0].xy, const[2].xy__, input[2].zz__, temp[1].xy__; 4: MAD output[2].xy, const[3].xy__, input[2].ww__, temp[0].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0].xy, const[0].xy__, input[2].xx__; 2: MAD temp[1].xy, const[1].xy__, input[2].yy__, temp[0].xy__; 3: MAD temp[0].xy, const[2].xy__, input[2].zz__, temp[1].xy__; 4: MAD output[2].xy, const[3].xy__, input[2].ww__, temp[0].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0].xy, const[0].xy__, input[2].xx__; 2: MAD temp[1].xy, const[1].xy__, input[2].yy__, temp[0].xy__; 3: MAD temp[0].xy, const[2].xy__, input[2].zz__, temp[1].xy__; 4: MAD output[2].xy, const[3].xy__, input[2].ww__, temp[0].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[0], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[0]; 10: MOV output[3], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0].xy, const[0].xy__, input[2].xx__; 2: MAD temp[1].xy, const[1].xy__, input[2].yy__, temp[0].xy__; 3: MAD temp[0].xy, const[2].xy__, input[2].zz__, temp[1].xy__; 4: MAD output[2].xy, const[3].xy__, input[2].ww__, temp[0].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[0], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[0]; 10: MOV output[3], temp[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00300002 dst: 0t op: VE_MULTIPLY src0: 0x01f90002 reg: 0c swiz: X/ Y/ U/ U src1: 0x01f80041 reg: 2i swiz: X/ X/ U/ U src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 2: op: 0x00302004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x01f90022 reg: 1c swiz: X/ Y/ U/ U src1: 0x01f92041 reg: 2i swiz: Y/ Y/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 3: op: 0x00300004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x01f90042 reg: 2c swiz: X/ Y/ U/ U src1: 0x01fa4041 reg: 2i swiz: Z/ Z/ U/ U src2: 0x01f90020 reg: 1t swiz: X/ Y/ U/ U 4: op: 0x00304204 dst: 2o op: VE_MULTIPLY_ADD src0: 0x01f90062 reg: 3c swiz: X/ Y/ U/ U src1: 0x01fb6041 reg: 2i swiz: W/ W/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 5: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 6: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 7: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 8: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 9: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 10: op: 0x00f06203 dst: 3o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], COLOR, LINEAR DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..3] 0: TEX TEMP[0], IN[1].xyyy, SAMP[1], 2D 1: MUL TEMP[1].w, TEMP[0].xxxw, CONST[0].xxxx 2: MAD TEMP[2].xyz, IN[0].xyzz, CONST[2].xyzz, CONST[3].xyzz 3: MUL TEMP[3].xyz, TEMP[0].xyzz, TEMP[2].xyzz 4: TEX TEMP[0].xyz, IN[1].xyyy, SAMP[0], 2D 5: MAD TEMP[1].xyz, TEMP[0].xyzx, CONST[1].xyzx, TEMP[3].xyzx 6: MOV OUT[0], TEMP[1] 7: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MAD temp[2].xyz, input[0].xyzz, const[2].xyzz, const[3].xyzz; 3: MUL temp[3].xyz, temp[0].xyzz, temp[2].xyzz; 4: TEX temp[0].xyz, input[1].xyyy, 2D[0]; 5: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[3].xyzx; 6: MOV output[0], temp[1]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MAD temp[2].xyz, input[0].xyzz, const[2].xyzz, const[3].xyzz; 3: MUL temp[3].xyz, temp[0].xyzz, temp[2].xyzz; 4: TEX temp[0].xyz, input[1].xyyy, 2D[0]; 5: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[3].xyzx; 6: MOV output[0], temp[1]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MAD temp[2].xyz, input[0].xyzz, const[2].xyzz, const[3].xyzz; 3: MUL temp[3].xyz, temp[0].xyzz, temp[2].xyzz; 4: TEX temp[0].xyz, input[1].xyyy, 2D[0]; 5: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[3].xyzx; 6: MOV output[0], temp[1]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MAD temp[2].xyz, input[0].xyzz, const[2].xyzz, const[3].xyzz; 3: MUL temp[3].xyz, temp[0].xyzz, temp[2].xyzz; 4: TEX temp[0].xyz, input[1].xyyy, 2D[0]; 5: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[3].xyzx; 6: MOV output[0], temp[1]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MAD temp[2].xyz, input[0].xyzz, const[2].xyzz, const[3].xyzz; 3: MUL temp[3].xyz, temp[0].xyzz, temp[2].xyzz; 4: TEX temp[0].xyz, input[1].xyyy, 2D[0]; 5: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[3].xyzx; 6: MOV output[0], temp[1]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MAD temp[2].xyz, input[0].xyzz, const[2].xyzz, const[3].xyzz; 3: MUL temp[3].xyz, temp[0].xyzz, temp[2].xyzz; 4: TEX temp[4], input[1].xyyy, 2D[0]; 5: MOV temp[0].xyz, temp[4]; 6: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[3].xyzx; 7: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: MAD temp[2].xyz, input[0].xyzz, const[2].xyzz, const[3].xyzz; 3: MUL temp[3].xyz, temp[0].xyzz, temp[2].xyzz; 4: TEX temp[4], input[1].xyyy, 2D[0]; 5: MOV temp[0].xyz, temp[4]; 6: MAD temp[1].xyz, temp[0].xyzx, const[1].xyzx, temp[3].xyzx; 7: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MAD temp[2].xyz, input[0].xyz_, const[2].xyz_, const[3].xyz_; 3: MUL temp[3].xyz, temp[0].xyz_, temp[2].xyz_; 4: TEX temp[4].xyz, input[1].xy__, 2D[0]; 5: MOV temp[0].xyz, temp[4].xyz_; 6: MAD temp[1].xyz, temp[0].xyz_, const[1].xyz_, temp[3].xyz_; 7: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MAD temp[2].xyz, input[0].xyz_, const[2].xyz_, const[3].xyz_; 3: MUL temp[3].xyz, temp[0].xyz_, temp[2].xyz_; 4: TEX temp[4].xyz, input[1].xy__, 2D[0]; 5: MOV temp[0].xyz, temp[4].xyz_; 6: MAD temp[1].xyz, temp[0].xyz_, const[1].xyz_, temp[3].xyz_; 7: MOV output[0], temp[1]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MAD temp[2].xyz, input[0].xyz_, const[2].xyz_, const[3].xyz_; 3: MUL temp[3].xyz, temp[0].xyz_, temp[2].xyz_; 4: TEX temp[4].xyz, input[1].xy__, 2D[0]; 5: MAD temp[1].xyz, temp[4].xyz_, const[1].xyz_, temp[3].xyz_; 6: MOV output[0], temp[1]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MAD temp[2].xyz, input[0].xyz_, const[2].xyz_, const[3].xyz_; 3: MUL temp[3].xyz, temp[0].xyz_, temp[2].xyz_; 4: TEX temp[4].xyz, input[1].xy__, 2D[0]; 5: MAD temp[1].xyz, temp[4].xyz_, const[1].xyz_, temp[3].xyz_; 6: MOV output[0], temp[1]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MAD temp[2].xyz, input[0].xyz_, const[2].xyz_, const[3].xyz_; 3: MUL temp[3].xyz, temp[0].xyz_, temp[2].xyz_; 4: TEX temp[4].xyz, input[1].xy__, 2D[0]; 5: MAD temp[1].xyz, temp[4].xyz_, const[1].xyz_, temp[3].xyz_; 6: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: MAD temp[2].xyz, input[0].xyz_, const[2].xyz_, const[3].xyz_; 3: MUL temp[3].xyz, temp[0].xyz_, temp[2].xyz_; 4: TEX temp[4].xyz, input[1].xy__, 2D[0]; 5: MAD temp[1].xyz, temp[4].xyz_, const[1].xyz_, temp[3].xyz_; 6: MOV output[0], temp[1]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: src0.xyz = const[0], src0.w = temp[0] MAD temp[1].w, src0.w, src0.x, src0.0 2: src0.xyz = input[0], src1.xyz = const[2], src2.xyz = const[3] MAD temp[2].xyz, src0.xyz, src1.xyz, src2.xyz 3: src0.xyz = temp[0], src1.xyz = temp[2] MAD temp[3].xyz, src0.xyz, src1.xyz, src0.000 4: TEX temp[4].xyz, input[1].xy__, 2D[0]; 5: src0.xyz = temp[4], src1.xyz = const[1], src2.xyz = temp[3] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 6: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[1].xy__, 2D[1]; 2: TEX temp[4].xyz, input[1].xy__, 2D[0]; 3: src0.xyz = input[0], src1.xyz = const[2], src2.xyz = const[3] MAD temp[2].xyz, src0.xyz, src1.xyz, src2.xyz 4: src0.xyz = temp[0], src0.w = temp[0], src1.xyz = temp[2], src2.xyz = const[0] MAD temp[3].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[1].w, src0.w, src2.x, src0.0 5: src0.xyz = temp[4], src1.xyz = const[1], src2.xyz = temp[3] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 6: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[2], temp[1].xy__, 2D[1]; 2: TEX temp[3].xyz, temp[1].xy__, 2D[0]; 3: src0.xyz = temp[0], src1.xyz = const[2], src2.xyz = const[3] MAD temp[0].xyz, src0.xyz, src1.xyz, src2.xyz 4: src0.xyz = temp[2], src0.w = temp[2], src1.xyz = temp[0], src2.xyz = const[0] MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[0].w, src0.w, src2.x, src0.0 5: src0.xyz = temp[3], src1.xyz = const[1], src2.xyz = temp[1] MAD temp[0].xyz, src0.xyz, src1.xyz, src2.xyz 6: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=15************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 3, tex_end: 1 (code_addr: 004200c0) TEX: TEX t2, t1, texture[1] (00008881) TEX t3, t1, texture[0] (000080c1) 0: xyz: t0 c2 c3 bias-> t0.xyz (03823880) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz c2.xyz c3.xyz op: 00020200 w: t0.x t0.x t0.x op: 00000000 1: xyz: t2 t0 c0 bias-> t1.xyz (03860002) w: t2 t0 t0 bias-> t0.w (00800002) xyz: t2.xyz t0.xyz 0.0 op: 00050200 w: t2.w c0.x 0.0 op: 00040309 2: xyz: t3 c1 t1 bias-> t0.xyz (03801843) w: t0 t0 t0 bias-> (00000000) xyz: t3.xyz c1.xyz t1.xyz op: 00020200 w: t3.x t3.x t3.x op: 00000000 3: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[10] DCL CONST[0..7] DCL TEMP[0..1] 0: MOV OUT[1], IN[1] 1: MUL TEMP[0], CONST[0], IN[2].xxxx 2: MAD TEMP[1], CONST[1], IN[2].yyyy, TEMP[0] 3: MAD TEMP[0], CONST[2], IN[2].zzzz, TEMP[1] 4: MAD OUT[2].xy, CONST[3].xyxx, IN[2].wwxx, TEMP[0].xyxx 5: MUL TEMP[0], CONST[4], IN[0].xxxx 6: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[0] 7: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[1] 8: MAD OUT[0], CONST[7], IN[0].wwww, TEMP[0] 9: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[2].xxxx; 2: MAD temp[1], const[1], input[2].yyyy, temp[0]; 3: MAD temp[0], const[2], input[2].zzzz, temp[1]; 4: MAD output[2].xy, const[3].xyxx, input[2].wwxx, temp[0].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[2].xxxx; 2: MAD temp[1], const[1], input[2].yyyy, temp[0]; 3: MAD temp[0], const[2], input[2].zzzz, temp[1]; 4: MAD output[2].xy, const[3].xyxx, input[2].wwxx, temp[0].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[2].xxxx; 2: MAD temp[1], const[1], input[2].yyyy, temp[0]; 3: MAD temp[0], const[2], input[2].zzzz, temp[1]; 4: MAD output[2].xy, const[3].xyxx, input[2].wwxx, temp[0].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[2].xxxx; 2: MAD temp[1], const[1], input[2].yyyy, temp[0]; 3: MAD temp[0], const[2], input[2].zzzz, temp[1]; 4: MAD output[2].xy, const[3].xyxx, input[2].wwxx, temp[0].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[2].xxxx; 2: MAD temp[1], const[1], input[2].yyyy, temp[0]; 3: MAD temp[0], const[2], input[2].zzzz, temp[1]; 4: MAD output[2].xy, const[3].xyxx, input[2].wwxx, temp[0].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0], const[0], input[2].xxxx; 2: MAD temp[1], const[1], input[2].yyyy, temp[0]; 3: MAD temp[0], const[2], input[2].zzzz, temp[1]; 4: MAD output[2].xy, const[3].xyxx, input[2].wwxx, temp[0].xyxx; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0].xy, const[0].xy__, input[2].xx__; 2: MAD temp[1].xy, const[1].xy__, input[2].yy__, temp[0].xy__; 3: MAD temp[0].xy, const[2].xy__, input[2].zz__, temp[1].xy__; 4: MAD output[2].xy, const[3].xy__, input[2].ww__, temp[0].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0].xy, const[0].xy__, input[2].xx__; 2: MAD temp[1].xy, const[1].xy__, input[2].yy__, temp[0].xy__; 3: MAD temp[0].xy, const[2].xy__, input[2].zz__, temp[1].xy__; 4: MAD output[2].xy, const[3].xy__, input[2].ww__, temp[0].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0].xy, const[0].xy__, input[2].xx__; 2: MAD temp[1].xy, const[1].xy__, input[2].yy__, temp[0].xy__; 3: MAD temp[0].xy, const[2].xy__, input[2].zz__, temp[1].xy__; 4: MAD output[2].xy, const[3].xy__, input[2].ww__, temp[0].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0].xy, const[0].xy__, input[2].xx__; 2: MAD temp[1].xy, const[1].xy__, input[2].yy__, temp[0].xy__; 3: MAD temp[0].xy, const[2].xy__, input[2].zz__, temp[1].xy__; 4: MAD output[2].xy, const[3].xy__, input[2].ww__, temp[0].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[2], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[2]; 10: MOV output[3], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0].xy, const[0].xy__, input[2].xx__; 2: MAD temp[1].xy, const[1].xy__, input[2].yy__, temp[0].xy__; 3: MAD temp[0].xy, const[2].xy__, input[2].zz__, temp[1].xy__; 4: MAD output[2].xy, const[3].xy__, input[2].ww__, temp[0].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[0], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[0]; 10: MOV output[3], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MUL temp[0].xy, const[0].xy__, input[2].xx__; 2: MAD temp[1].xy, const[1].xy__, input[2].yy__, temp[0].xy__; 3: MAD temp[0].xy, const[2].xy__, input[2].zz__, temp[1].xy__; 4: MAD output[2].xy, const[3].xy__, input[2].ww__, temp[0].xy__; 5: MUL temp[0], const[4], input[0].xxxx; 6: MAD temp[1], const[5], input[0].yyyy, temp[0]; 7: MAD temp[0], const[6], input[0].zzzz, temp[1]; 8: MAD temp[0], const[7], input[0].wwww, temp[0]; 9: MOV output[0], temp[0]; 10: MOV output[3], temp[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00300002 dst: 0t op: VE_MULTIPLY src0: 0x01f90002 reg: 0c swiz: X/ Y/ U/ U src1: 0x01f80041 reg: 2i swiz: X/ X/ U/ U src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 2: op: 0x00302004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x01f90022 reg: 1c swiz: X/ Y/ U/ U src1: 0x01f92041 reg: 2i swiz: Y/ Y/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 3: op: 0x00300004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x01f90042 reg: 2c swiz: X/ Y/ U/ U src1: 0x01fa4041 reg: 2i swiz: Z/ Z/ U/ U src2: 0x01f90020 reg: 1t swiz: X/ Y/ U/ U 4: op: 0x00304204 dst: 2o op: VE_MULTIPLY_ADD src0: 0x01f90062 reg: 3c swiz: X/ Y/ U/ U src1: 0x01fb6041 reg: 2i swiz: W/ W/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 5: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 6: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 7: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 8: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 9: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 10: op: 0x00f06203 dst: 3o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], GENERIC[10], PERSPECTIVE DCL IN[1], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..5] IMM FLT32 { -0.5000, 1.0000, 0.0100, -0.0100} IMM FLT32 { 0.0100, 0.0000, 20.0000, -0.0100} 0: RCP TEMP[0].x, IN[0].wwww 1: MUL TEMP[1].xy, CONST[2].xyyy, TEMP[0].xxxx 2: MAD TEMP[0].xy, IN[0].xyyy, TEMP[1].xyyy, CONST[1].xyyy 3: TEX TEMP[1].xyz, IN[1].xyyy, SAMP[1], 2D 4: ADD TEMP[2].xyz, TEMP[1].xyzz, IMM[0].xxxy 5: DP3 TEMP[1].x, TEMP[2].xyzz, TEMP[2].xyzz 6: RSQ TEMP[3].x, TEMP[1].xxxx 7: MUL TEMP[1].xy, TEMP[2].xyzz, TEMP[3].xxxx 8: MAD TEMP[2].xy, TEMP[1].xyyy, CONST[3].xyyy, TEMP[0].xyyy 9: ADD TEMP[1].xy, TEMP[2].xyyy, IMM[1].xxyz 10: TEX TEMP[3].xyz, TEMP[1].xyyy, SAMP[0], 2D 11: ADD TEMP[1].xy, TEMP[2].xyyy, IMM[1].xwyy 12: TEX TEMP[4].xyz, TEMP[1].xyyy, SAMP[0], 2D 13: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[3].xyzz 14: RSQ TEMP[3].x, TEMP[1].xxxx 15: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[1].xxxx 16: CMP TEMP[3].x, -TEMP[1].xxxx, TEMP[3].xxxx, IMM[1].yyyy 17: MUL TEMP[1].x, TEMP[3].xxxx, IMM[1].zzzz 18: MIN TEMP[3].x, IMM[0].yyyy, TEMP[1].xxxx 19: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[4].xyzz 20: RSQ TEMP[4].x, TEMP[1].xxxx 21: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[1].xxxx 22: CMP TEMP[4].x, -TEMP[1].xxxx, TEMP[4].xxxx, IMM[1].yyyy 23: MUL TEMP[1].x, TEMP[4].xxxx, IMM[1].zzzz 24: MIN TEMP[4].x, IMM[0].yyyy, TEMP[1].xxxx 25: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[4].xxxx 26: ADD TEMP[3].xy, TEMP[2].xyyy, IMM[1].wxxx 27: TEX TEMP[4].xyz, TEMP[3].xyyy, SAMP[0], 2D 28: DP3 TEMP[3].x, TEMP[4].xyzz, TEMP[4].xyzz 29: RSQ TEMP[4].x, TEMP[3].xxxx 30: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[3].xxxx 31: CMP TEMP[4].x, -TEMP[3].xxxx, TEMP[4].xxxx, IMM[1].yyyy 32: MUL TEMP[3].x, TEMP[4].xxxx, IMM[1].zzzz 33: MIN TEMP[4].x, IMM[0].yyyy, TEMP[3].xxxx 34: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[4].xxxx 35: ADD TEMP[1].xy, TEMP[2].xyyy, IMM[1].wwww 36: TEX TEMP[4].xyz, TEMP[1].xyyy, SAMP[0], 2D 37: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[4].xyzz 38: RSQ TEMP[4].x, TEMP[1].xxxx 39: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[1].xxxx 40: CMP TEMP[4].x, -TEMP[1].xxxx, TEMP[4].xxxx, IMM[1].yyyy 41: MUL TEMP[1].x, TEMP[4].xxxx, IMM[1].zzzz 42: MIN TEMP[4].x, IMM[0].yyyy, TEMP[1].xxxx 43: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[4].xxxx 44: MOV TEMP[3].w, IMM[0].yyyy 45: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[1].xxxx 46: MUL TEMP[5].xy, TEMP[0].xyyy, TEMP[4].xxxx 47: MAD TEMP[0].xy, TEMP[2].xyyy, TEMP[1].xxxx, TEMP[5].xyyy 48: TEX TEMP[1].xyz, TEMP[0].xyyy, SAMP[0], 2D 49: MOV TEMP[3].xyz, TEMP[1].xyzx 50: MUL OUT[0], TEMP[3], CONST[0] 51: END Fragment Program: before compilation # Radeon Compiler Program 0: RCP temp[0].x, input[0].wwww; 1: MUL temp[1].xy, const[2].xyyy, temp[0].xxxx; 2: MAD temp[0].xy, input[0].xyyy, temp[1].xyyy, const[1].xyyy; 3: TEX temp[1].xyz, input[1].xyyy, 2D[1]; 4: ADD temp[2].xyz, temp[1].xyzz, const[4].xxxy; 5: DP3 temp[1].x, temp[2].xyzz, temp[2].xyzz; 6: RSQ temp[3].x, temp[1].xxxx; 7: MUL temp[1].xy, temp[2].xyzz, temp[3].xxxx; 8: MAD temp[2].xy, temp[1].xyyy, const[3].xyyy, temp[0].xyyy; 9: ADD temp[1].xy, temp[2].xyyy, const[5].xxyz; 10: TEX temp[3].xyz, temp[1].xyyy, 2D[0]; 11: ADD temp[1].xy, temp[2].xyyy, const[5].xwyy; 12: TEX temp[4].xyz, temp[1].xyyy, 2D[0]; 13: DP3 temp[1].x, temp[3].xyzz, temp[3].xyzz; 14: RSQ temp[3].x, temp[1].xxxx; 15: MUL temp[3].x, temp[3].xxxx, temp[1].xxxx; 16: CMP temp[3].x, -temp[1].xxxx, temp[3].xxxx, const[5].yyyy; 17: MUL temp[1].x, temp[3].xxxx, const[5].zzzz; 18: MIN temp[3].x, const[4].yyyy, temp[1].xxxx; 19: DP3 temp[1].x, temp[4].xyzz, temp[4].xyzz; 20: RSQ temp[4].x, temp[1].xxxx; 21: MUL temp[4].x, temp[4].xxxx, temp[1].xxxx; 22: CMP temp[4].x, -temp[1].xxxx, temp[4].xxxx, const[5].yyyy; 23: MUL temp[1].x, temp[4].xxxx, const[5].zzzz; 24: MIN temp[4].x, const[4].yyyy, temp[1].xxxx; 25: MUL temp[1].x, temp[3].xxxx, temp[4].xxxx; 26: ADD temp[3].xy, temp[2].xyyy, const[5].wxxx; 27: TEX temp[4].xyz, temp[3].xyyy, 2D[0]; 28: DP3 temp[3].x, temp[4].xyzz, temp[4].xyzz; 29: RSQ temp[4].x, temp[3].xxxx; 30: MUL temp[4].x, temp[4].xxxx, temp[3].xxxx; 31: CMP temp[4].x, -temp[3].xxxx, temp[4].xxxx, const[5].yyyy; 32: MUL temp[3].x, temp[4].xxxx, const[5].zzzz; 33: MIN temp[4].x, const[4].yyyy, temp[3].xxxx; 34: MUL temp[3].x, temp[1].xxxx, temp[4].xxxx; 35: ADD temp[1].xy, temp[2].xyyy, const[5].wwww; 36: TEX temp[4].xyz, temp[1].xyyy, 2D[0]; 37: DP3 temp[1].x, temp[4].xyzz, temp[4].xyzz; 38: RSQ temp[4].x, temp[1].xxxx; 39: MUL temp[4].x, temp[4].xxxx, temp[1].xxxx; 40: CMP temp[4].x, -temp[1].xxxx, temp[4].xxxx, const[5].yyyy; 41: MUL temp[1].x, temp[4].xxxx, const[5].zzzz; 42: MIN temp[4].x, const[4].yyyy, temp[1].xxxx; 43: MUL temp[1].x, temp[3].xxxx, temp[4].xxxx; 44: MOV temp[3].w, const[4].yyyy; 45: ADD temp[4].x, const[4].yyyy, -temp[1].xxxx; 46: MUL temp[5].xy, temp[0].xyyy, temp[4].xxxx; 47: MAD temp[0].xy, temp[2].xyyy, temp[1].xxxx, temp[5].xyyy; 48: TEX temp[1].xyz, temp[0].xyyy, 2D[0]; 49: MOV temp[3].xyz, temp[1].xyzx; 50: MUL output[0], temp[3], const[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: RCP temp[0].x, input[0].wwww; 1: MUL temp[1].xy, const[2].xyyy, temp[0].xxxx; 2: MAD temp[0].xy, input[0].xyyy, temp[1].xyyy, const[1].xyyy; 3: TEX temp[1].xyz, input[1].xyyy, 2D[1]; 4: ADD temp[2].xyz, temp[1].xyzz, const[4].xxxy; 5: DP3 temp[1].x, temp[2].xyzz, temp[2].xyzz; 6: RSQ temp[3].x, temp[1].xxxx; 7: MUL temp[1].xy, temp[2].xyzz, temp[3].xxxx; 8: MAD temp[2].xy, temp[1].xyyy, const[3].xyyy, temp[0].xyyy; 9: ADD temp[1].xy, temp[2].xyyy, const[5].xxyz; 10: TEX temp[3].xyz, temp[1].xyyy, 2D[0]; 11: ADD temp[1].xy, temp[2].xyyy, const[5].xwyy; 12: TEX temp[4].xyz, temp[1].xyyy, 2D[0]; 13: DP3 temp[1].x, temp[3].xyzz, temp[3].xyzz; 14: RSQ temp[3].x, temp[1].xxxx; 15: MUL temp[3].x, temp[3].xxxx, temp[1].xxxx; 16: CMP temp[3].x, -temp[1].xxxx, temp[3].xxxx, const[5].yyyy; 17: MUL temp[1].x, temp[3].xxxx, const[5].zzzz; 18: MIN temp[3].x, const[4].yyyy, temp[1].xxxx; 19: DP3 temp[1].x, temp[4].xyzz, temp[4].xyzz; 20: RSQ temp[4].x, temp[1].xxxx; 21: MUL temp[4].x, temp[4].xxxx, temp[1].xxxx; 22: CMP temp[4].x, -temp[1].xxxx, temp[4].xxxx, const[5].yyyy; 23: MUL temp[1].x, temp[4].xxxx, const[5].zzzz; 24: MIN temp[4].x, const[4].yyyy, temp[1].xxxx; 25: MUL temp[1].x, temp[3].xxxx, temp[4].xxxx; 26: ADD temp[3].xy, temp[2].xyyy, const[5].wxxx; 27: TEX temp[4].xyz, temp[3].xyyy, 2D[0]; 28: DP3 temp[3].x, temp[4].xyzz, temp[4].xyzz; 29: RSQ temp[4].x, temp[3].xxxx; 30: MUL temp[4].x, temp[4].xxxx, temp[3].xxxx; 31: CMP temp[4].x, -temp[3].xxxx, temp[4].xxxx, const[5].yyyy; 32: MUL temp[3].x, temp[4].xxxx, const[5].zzzz; 33: MIN temp[4].x, const[4].yyyy, temp[3].xxxx; 34: MUL temp[3].x, temp[1].xxxx, temp[4].xxxx; 35: ADD temp[1].xy, temp[2].xyyy, const[5].wwww; 36: TEX temp[4].xyz, temp[1].xyyy, 2D[0]; 37: DP3 temp[1].x, temp[4].xyzz, temp[4].xyzz; 38: RSQ temp[4].x, temp[1].xxxx; 39: MUL temp[4].x, temp[4].xxxx, temp[1].xxxx; 40: CMP temp[4].x, -temp[1].xxxx, temp[4].xxxx, const[5].yyyy; 41: MUL temp[1].x, temp[4].xxxx, const[5].zzzz; 42: MIN temp[4].x, const[4].yyyy, temp[1].xxxx; 43: MUL temp[1].x, temp[3].xxxx, temp[4].xxxx; 44: MOV temp[3].w, const[4].yyyy; 45: ADD temp[4].x, const[4].yyyy, -temp[1].xxxx; 46: MUL temp[5].xy, temp[0].xyyy, temp[4].xxxx; 47: MAD temp[0].xy, temp[2].xyyy, temp[1].xxxx, temp[5].xyyy; 48: TEX temp[1].xyz, temp[0].xyyy, 2D[0]; 49: MOV temp[3].xyz, temp[1].xyzx; 50: MUL output[0], temp[3], const[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: RCP temp[0].x, input[0].wwww; 1: MUL temp[1].xy, const[2].xyyy, temp[0].xxxx; 2: MAD temp[0].xy, input[0].xyyy, temp[1].xyyy, const[1].xyyy; 3: TEX temp[1].xyz, input[1].xyyy, 2D[1]; 4: ADD temp[2].xyz, temp[1].xyzz, const[4].xxxy; 5: DP3 temp[1].x, temp[2].xyzz, temp[2].xyzz; 6: RSQ temp[3].x, temp[1].xxxx; 7: MUL temp[1].xy, temp[2].xyzz, temp[3].xxxx; 8: MAD temp[2].xy, temp[1].xyyy, const[3].xyyy, temp[0].xyyy; 9: ADD temp[1].xy, temp[2].xyyy, const[5].xxyz; 10: TEX temp[3].xyz, temp[1].xyyy, 2D[0]; 11: ADD temp[1].xy, temp[2].xyyy, const[5].xwyy; 12: TEX temp[4].xyz, temp[1].xyyy, 2D[0]; 13: DP3 temp[1].x, temp[3].xyzz, temp[3].xyzz; 14: RSQ temp[3].x, temp[1].xxxx; 15: MUL temp[3].x, temp[3].xxxx, temp[1].xxxx; 16: CMP temp[3].x, -temp[1].xxxx, temp[3].xxxx, const[5].yyyy; 17: MUL temp[1].x, temp[3].xxxx, const[5].zzzz; 18: MIN temp[3].x, const[4].yyyy, temp[1].xxxx; 19: DP3 temp[1].x, temp[4].xyzz, temp[4].xyzz; 20: RSQ temp[4].x, temp[1].xxxx; 21: MUL temp[4].x, temp[4].xxxx, temp[1].xxxx; 22: CMP temp[4].x, -temp[1].xxxx, temp[4].xxxx, const[5].yyyy; 23: MUL temp[1].x, temp[4].xxxx, const[5].zzzz; 24: MIN temp[4].x, const[4].yyyy, temp[1].xxxx; 25: MUL temp[1].x, temp[3].xxxx, temp[4].xxxx; 26: ADD temp[3].xy, temp[2].xyyy, const[5].wxxx; 27: TEX temp[4].xyz, temp[3].xyyy, 2D[0]; 28: DP3 temp[3].x, temp[4].xyzz, temp[4].xyzz; 29: RSQ temp[4].x, temp[3].xxxx; 30: MUL temp[4].x, temp[4].xxxx, temp[3].xxxx; 31: CMP temp[4].x, -temp[3].xxxx, temp[4].xxxx, const[5].yyyy; 32: MUL temp[3].x, temp[4].xxxx, const[5].zzzz; 33: MIN temp[4].x, const[4].yyyy, temp[3].xxxx; 34: MUL temp[3].x, temp[1].xxxx, temp[4].xxxx; 35: ADD temp[1].xy, temp[2].xyyy, const[5].wwww; 36: TEX temp[4].xyz, temp[1].xyyy, 2D[0]; 37: DP3 temp[1].x, temp[4].xyzz, temp[4].xyzz; 38: RSQ temp[4].x, temp[1].xxxx; 39: MUL temp[4].x, temp[4].xxxx, temp[1].xxxx; 40: CMP temp[4].x, -temp[1].xxxx, temp[4].xxxx, const[5].yyyy; 41: MUL temp[1].x, temp[4].xxxx, const[5].zzzz; 42: MIN temp[4].x, const[4].yyyy, temp[1].xxxx; 43: MUL temp[1].x, temp[3].xxxx, temp[4].xxxx; 44: MOV temp[3].w, const[4].yyyy; 45: ADD temp[4].x, const[4].yyyy, -temp[1].xxxx; 46: MUL temp[5].xy, temp[0].xyyy, temp[4].xxxx; 47: MAD temp[0].xy, temp[2].xyyy, temp[1].xxxx, temp[5].xyyy; 48: TEX temp[1].xyz, temp[0].xyyy, 2D[0]; 49: MOV temp[3].xyz, temp[1].xyzx; 50: MUL output[0], temp[3], const[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: RCP temp[0].x, input[0].wwww; 1: MUL temp[1].xy, const[2].xyyy, temp[0].xxxx; 2: MAD temp[0].xy, input[0].xyyy, temp[1].xyyy, const[1].xyyy; 3: TEX temp[1].xyz, input[1].xyyy, 2D[1]; 4: ADD temp[2].xyz, temp[1].xyzz, const[4].xxxy; 5: DP3 temp[1].x, temp[2].xyzz, temp[2].xyzz; 6: RSQ temp[3].x, temp[1].xxxx; 7: MUL temp[1].xy, temp[2].xyzz, temp[3].xxxx; 8: MAD temp[2].xy, temp[1].xyyy, const[3].xyyy, temp[0].xyyy; 9: ADD temp[1].xy, temp[2].xyyy, const[5].xxyz; 10: TEX temp[3].xyz, temp[1].xyyy, 2D[0]; 11: ADD temp[1].xy, temp[2].xyyy, const[5].xwyy; 12: TEX temp[4].xyz, temp[1].xyyy, 2D[0]; 13: DP3 temp[1].x, temp[3].xyzz, temp[3].xyzz; 14: RSQ temp[3].x, temp[1].xxxx; 15: MUL temp[3].x, temp[3].xxxx, temp[1].xxxx; 16: CMP temp[3].x, -temp[1].xxxx, temp[3].xxxx, const[5].yyyy; 17: MUL temp[1].x, temp[3].xxxx, const[5].zzzz; 18: MIN temp[3].x, const[4].yyyy, temp[1].xxxx; 19: DP3 temp[1].x, temp[4].xyzz, temp[4].xyzz; 20: RSQ temp[4].x, temp[1].xxxx; 21: MUL temp[4].x, temp[4].xxxx, temp[1].xxxx; 22: CMP temp[4].x, -temp[1].xxxx, temp[4].xxxx, const[5].yyyy; 23: MUL temp[1].x, temp[4].xxxx, const[5].zzzz; 24: MIN temp[4].x, const[4].yyyy, temp[1].xxxx; 25: MUL temp[1].x, temp[3].xxxx, temp[4].xxxx; 26: ADD temp[3].xy, temp[2].xyyy, const[5].wxxx; 27: TEX temp[4].xyz, temp[3].xyyy, 2D[0]; 28: DP3 temp[3].x, temp[4].xyzz, temp[4].xyzz; 29: RSQ temp[4].x, temp[3].xxxx; 30: MUL temp[4].x, temp[4].xxxx, temp[3].xxxx; 31: CMP temp[4].x, -temp[3].xxxx, temp[4].xxxx, const[5].yyyy; 32: MUL temp[3].x, temp[4].xxxx, const[5].zzzz; 33: MIN temp[4].x, const[4].yyyy, temp[3].xxxx; 34: MUL temp[3].x, temp[1].xxxx, temp[4].xxxx; 35: ADD temp[1].xy, temp[2].xyyy, const[5].wwww; 36: TEX temp[4].xyz, temp[1].xyyy, 2D[0]; 37: DP3 temp[1].x, temp[4].xyzz, temp[4].xyzz; 38: RSQ temp[4].x, temp[1].xxxx; 39: MUL temp[4].x, temp[4].xxxx, temp[1].xxxx; 40: CMP temp[4].x, -temp[1].xxxx, temp[4].xxxx, const[5].yyyy; 41: MUL temp[1].x, temp[4].xxxx, const[5].zzzz; 42: MIN temp[4].x, const[4].yyyy, temp[1].xxxx; 43: MUL temp[1].x, temp[3].xxxx, temp[4].xxxx; 44: MOV temp[3].w, const[4].yyyy; 45: ADD temp[4].x, const[4].yyyy, -temp[1].xxxx; 46: MUL temp[5].xy, temp[0].xyyy, temp[4].xxxx; 47: MAD temp[0].xy, temp[2].xyyy, temp[1].xxxx, temp[5].xyyy; 48: TEX temp[1].xyz, temp[0].xyyy, 2D[0]; 49: MOV temp[3].xyz, temp[1].xyzx; 50: MUL output[0], temp[3], const[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: RCP temp[0].x, input[0].wwww; 1: MUL temp[1].xy, const[2].xyyy, temp[0].xxxx; 2: MAD temp[0].xy, input[0].xyyy, temp[1].xyyy, const[1].xyyy; 3: TEX temp[1].xyz, input[1].xyyy, 2D[1]; 4: ADD temp[2].xyz, temp[1].xyzz, const[4].xxxy; 5: DP3 temp[1].x, temp[2].xyzz, temp[2].xyzz; 6: RSQ temp[3].x, temp[1].xxxx; 7: MUL temp[1].xy, temp[2].xyzz, temp[3].xxxx; 8: MAD temp[2].xy, temp[1].xyyy, const[3].xyyy, temp[0].xyyy; 9: ADD temp[1].xy, temp[2].xyyy, const[5].xxyz; 10: TEX temp[3].xyz, temp[1].xyyy, 2D[0]; 11: ADD temp[1].xy, temp[2].xyyy, const[5].xwyy; 12: TEX temp[4].xyz, temp[1].xyyy, 2D[0]; 13: DP3 temp[1].x, temp[3].xyzz, temp[3].xyzz; 14: RSQ temp[3].x, temp[1].xxxx; 15: MUL temp[3].x, temp[3].xxxx, temp[1].xxxx; 16: CMP temp[3].x, -temp[1].xxxx, temp[3].xxxx, const[5].yyyy; 17: MUL temp[1].x, temp[3].xxxx, const[5].zzzz; 18: MIN temp[3].x, const[4].yyyy, temp[1].xxxx; 19: DP3 temp[1].x, temp[4].xyzz, temp[4].xyzz; 20: RSQ temp[4].x, temp[1].xxxx; 21: MUL temp[4].x, temp[4].xxxx, temp[1].xxxx; 22: CMP temp[4].x, -temp[1].xxxx, temp[4].xxxx, const[5].yyyy; 23: MUL temp[1].x, temp[4].xxxx, const[5].zzzz; 24: MIN temp[4].x, const[4].yyyy, temp[1].xxxx; 25: MUL temp[1].x, temp[3].xxxx, temp[4].xxxx; 26: ADD temp[3].xy, temp[2].xyyy, const[5].wxxx; 27: TEX temp[4].xyz, temp[3].xyyy, 2D[0]; 28: DP3 temp[3].x, temp[4].xyzz, temp[4].xyzz; 29: RSQ temp[4].x, temp[3].xxxx; 30: MUL temp[4].x, temp[4].xxxx, temp[3].xxxx; 31: CMP temp[4].x, -temp[3].xxxx, temp[4].xxxx, const[5].yyyy; 32: MUL temp[3].x, temp[4].xxxx, const[5].zzzz; 33: MIN temp[4].x, const[4].yyyy, temp[3].xxxx; 34: MUL temp[3].x, temp[1].xxxx, temp[4].xxxx; 35: ADD temp[1].xy, temp[2].xyyy, const[5].wwww; 36: TEX temp[4].xyz, temp[1].xyyy, 2D[0]; 37: DP3 temp[1].x, temp[4].xyzz, temp[4].xyzz; 38: RSQ temp[4].x, temp[1].xxxx; 39: MUL temp[4].x, temp[4].xxxx, temp[1].xxxx; 40: CMP temp[4].x, -temp[1].xxxx, temp[4].xxxx, const[5].yyyy; 41: MUL temp[1].x, temp[4].xxxx, const[5].zzzz; 42: MIN temp[4].x, const[4].yyyy, temp[1].xxxx; 43: MUL temp[1].x, temp[3].xxxx, temp[4].xxxx; 44: MOV temp[3].w, const[4].yyyy; 45: ADD temp[4].x, const[4].yyyy, -temp[1].xxxx; 46: MUL temp[5].xy, temp[0].xyyy, temp[4].xxxx; 47: MAD temp[0].xy, temp[2].xyyy, temp[1].xxxx, temp[5].xyyy; 48: TEX temp[1].xyz, temp[0].xyyy, 2D[0]; 49: MOV temp[3].xyz, temp[1].xyzx; 50: MUL output[0], temp[3], const[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: RCP temp[0].x, input[0].wwww; 1: MUL temp[1].xy, const[2].xyyy, temp[0].xxxx; 2: MAD temp[0].xy, input[0].xyyy, temp[1].xyyy, const[1].xyyy; 3: TEX temp[6], input[1].xyyy, 2D[1]; 4: MOV temp[1].xyz, temp[6]; 5: ADD temp[2].xyz, temp[1].xyzz, const[4].xxxy; 6: DP3 temp[1].x, temp[2].xyzz, temp[2].xyzz; 7: RSQ temp[3].x, temp[1].xxxx; 8: MUL temp[1].xy, temp[2].xyzz, temp[3].xxxx; 9: MAD temp[2].xy, temp[1].xyyy, const[3].xyyy, temp[0].xyyy; 10: ADD temp[1].xy, temp[2].xyyy, const[5].xxyz; 11: TEX temp[7], temp[1].xyyy, 2D[0]; 12: MOV temp[3].xyz, temp[7]; 13: ADD temp[1].xy, temp[2].xyyy, const[5].xwyy; 14: TEX temp[8], temp[1].xyyy, 2D[0]; 15: MOV temp[4].xyz, temp[8]; 16: DP3 temp[1].x, temp[3].xyzz, temp[3].xyzz; 17: RSQ temp[3].x, temp[1].xxxx; 18: MUL temp[3].x, temp[3].xxxx, temp[1].xxxx; 19: CMP temp[3].x, -temp[1].xxxx, temp[3].xxxx, const[5].yyyy; 20: MUL temp[1].x, temp[3].xxxx, const[5].zzzz; 21: MIN temp[3].x, const[4].yyyy, temp[1].xxxx; 22: DP3 temp[1].x, temp[4].xyzz, temp[4].xyzz; 23: RSQ temp[4].x, temp[1].xxxx; 24: MUL temp[4].x, temp[4].xxxx, temp[1].xxxx; 25: CMP temp[4].x, -temp[1].xxxx, temp[4].xxxx, const[5].yyyy; 26: MUL temp[1].x, temp[4].xxxx, const[5].zzzz; 27: MIN temp[4].x, const[4].yyyy, temp[1].xxxx; 28: MUL temp[1].x, temp[3].xxxx, temp[4].xxxx; 29: ADD temp[3].xy, temp[2].xyyy, const[5].wxxx; 30: TEX temp[9], temp[3].xyyy, 2D[0]; 31: MOV temp[4].xyz, temp[9]; 32: DP3 temp[3].x, temp[4].xyzz, temp[4].xyzz; 33: RSQ temp[4].x, temp[3].xxxx; 34: MUL temp[4].x, temp[4].xxxx, temp[3].xxxx; 35: CMP temp[4].x, -temp[3].xxxx, temp[4].xxxx, const[5].yyyy; 36: MUL temp[3].x, temp[4].xxxx, const[5].zzzz; 37: MIN temp[4].x, const[4].yyyy, temp[3].xxxx; 38: MUL temp[3].x, temp[1].xxxx, temp[4].xxxx; 39: ADD temp[1].xy, temp[2].xyyy, const[5].wwww; 40: TEX temp[10], temp[1].xyyy, 2D[0]; 41: MOV temp[4].xyz, temp[10]; 42: DP3 temp[1].x, temp[4].xyzz, temp[4].xyzz; 43: RSQ temp[4].x, temp[1].xxxx; 44: MUL temp[4].x, temp[4].xxxx, temp[1].xxxx; 45: CMP temp[4].x, -temp[1].xxxx, temp[4].xxxx, const[5].yyyy; 46: MUL temp[1].x, temp[4].xxxx, const[5].zzzz; 47: MIN temp[4].x, const[4].yyyy, temp[1].xxxx; 48: MUL temp[1].x, temp[3].xxxx, temp[4].xxxx; 49: MOV temp[3].w, const[4].yyyy; 50: ADD temp[4].x, const[4].yyyy, -temp[1].xxxx; 51: MUL temp[5].xy, temp[0].xyyy, temp[4].xxxx; 52: MAD temp[0].xy, temp[2].xyyy, temp[1].xxxx, temp[5].xyyy; 53: TEX temp[11], temp[0].xyyy, 2D[0]; 54: MOV temp[1].xyz, temp[11]; 55: MOV temp[3].xyz, temp[1].xyzx; 56: MUL output[0], temp[3], const[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: RCP temp[0].x, input[0].wwww; 1: MUL temp[1].xy, const[2].xyyy, temp[0].xxxx; 2: MAD temp[0].xy, input[0].xyyy, temp[1].xyyy, const[1].xyyy; 3: TEX temp[6], input[1].xyyy, 2D[1]; 4: MOV temp[1].xyz, temp[6]; 5: ADD temp[2].xyz, temp[1].xyzz, const[4].xxxy; 6: DP3 temp[1].x, temp[2].xyzz, temp[2].xyzz; 7: RSQ temp[3].x, |temp[1].xxxx|; 8: MUL temp[1].xy, temp[2].xyzz, temp[3].xxxx; 9: MAD temp[2].xy, temp[1].xyyy, const[3].xyyy, temp[0].xyyy; 10: ADD temp[1].xy, temp[2].xyyy, const[5].xxyz; 11: TEX temp[7], temp[1].xyyy, 2D[0]; 12: MOV temp[3].xyz, temp[7]; 13: ADD temp[1].xy, temp[2].xyyy, const[5].xwyy; 14: TEX temp[8], temp[1].xyyy, 2D[0]; 15: MOV temp[4].xyz, temp[8]; 16: DP3 temp[1].x, temp[3].xyzz, temp[3].xyzz; 17: RSQ temp[3].x, |temp[1].xxxx|; 18: MUL temp[3].x, temp[3].xxxx, temp[1].xxxx; 19: CMP temp[3].x, -temp[1].xxxx, temp[3].xxxx, const[5].yyyy; 20: MUL temp[1].x, temp[3].xxxx, const[5].zzzz; 21: MIN temp[3].x, const[4].yyyy, temp[1].xxxx; 22: DP3 temp[1].x, temp[4].xyzz, temp[4].xyzz; 23: RSQ temp[4].x, |temp[1].xxxx|; 24: MUL temp[4].x, temp[4].xxxx, temp[1].xxxx; 25: CMP temp[4].x, -temp[1].xxxx, temp[4].xxxx, const[5].yyyy; 26: MUL temp[1].x, temp[4].xxxx, const[5].zzzz; 27: MIN temp[4].x, const[4].yyyy, temp[1].xxxx; 28: MUL temp[1].x, temp[3].xxxx, temp[4].xxxx; 29: ADD temp[3].xy, temp[2].xyyy, const[5].wxxx; 30: TEX temp[9], temp[3].xyyy, 2D[0]; 31: MOV temp[4].xyz, temp[9]; 32: DP3 temp[3].x, temp[4].xyzz, temp[4].xyzz; 33: RSQ temp[4].x, |temp[3].xxxx|; 34: MUL temp[4].x, temp[4].xxxx, temp[3].xxxx; 35: CMP temp[4].x, -temp[3].xxxx, temp[4].xxxx, const[5].yyyy; 36: MUL temp[3].x, temp[4].xxxx, const[5].zzzz; 37: MIN temp[4].x, const[4].yyyy, temp[3].xxxx; 38: MUL temp[3].x, temp[1].xxxx, temp[4].xxxx; 39: ADD temp[1].xy, temp[2].xyyy, const[5].wwww; 40: TEX temp[10], temp[1].xyyy, 2D[0]; 41: MOV temp[4].xyz, temp[10]; 42: DP3 temp[1].x, temp[4].xyzz, temp[4].xyzz; 43: RSQ temp[4].x, |temp[1].xxxx|; 44: MUL temp[4].x, temp[4].xxxx, temp[1].xxxx; 45: CMP temp[4].x, -temp[1].xxxx, temp[4].xxxx, const[5].yyyy; 46: MUL temp[1].x, temp[4].xxxx, const[5].zzzz; 47: MIN temp[4].x, const[4].yyyy, temp[1].xxxx; 48: MUL temp[1].x, temp[3].xxxx, temp[4].xxxx; 49: MOV temp[3].w, const[4].yyyy; 50: ADD temp[4].x, const[4].yyyy, -temp[1].xxxx; 51: MUL temp[5].xy, temp[0].xyyy, temp[4].xxxx; 52: MAD temp[0].xy, temp[2].xyyy, temp[1].xxxx, temp[5].xyyy; 53: TEX temp[11], temp[0].xyyy, 2D[0]; 54: MOV temp[1].xyz, temp[11]; 55: MOV temp[3].xyz, temp[1].xyzx; 56: MUL output[0], temp[3], const[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: RCP temp[0].x, input[0].w___; 1: MUL temp[1].xy, const[2].xy__, temp[0].xx__; 2: MAD temp[0].xy, input[0].xy__, temp[1].xy__, const[1].xy__; 3: TEX temp[6].xyz, input[1].xy__, 2D[1]; 4: MOV temp[1].xyz, temp[6].xyz_; 5: ADD temp[2].xyz, temp[1].xyz_, const[4].xxx_; 6: DP3 temp[1].x, temp[2].xyz_, temp[2].xyz_; 7: RSQ temp[3].x, |temp[1].x___|; 8: MUL temp[1].xy, temp[2].xy__, temp[3].xx__; 9: MAD temp[2].xy, temp[1].xy__, const[3].xy__, temp[0].xy__; 10: ADD temp[1].xy, temp[2].xy__, const[5].xx__; 11: TEX temp[7].xyz, temp[1].xy__, 2D[0]; 12: MOV temp[3].xyz, temp[7].xyz_; 13: ADD temp[1].xy, temp[2].xy__, const[5].xw__; 14: TEX temp[8].xyz, temp[1].xy__, 2D[0]; 15: MOV temp[4].xyz, temp[8].xyz_; 16: DP3 temp[1].x, temp[3].xyz_, temp[3].xyz_; 17: RSQ temp[3].x, |temp[1].x___|; 18: MUL temp[3].x, temp[3].x___, temp[1].x___; 19: CMP temp[3].x, -temp[1].x___, temp[3].x___, const[5].y___; 20: MUL temp[1].x, temp[3].x___, const[5].z___; 21: MIN temp[3].x, const[4].y___, temp[1].x___; 22: DP3 temp[1].x, temp[4].xyz_, temp[4].xyz_; 23: RSQ temp[4].x, |temp[1].x___|; 24: MUL temp[4].x, temp[4].x___, temp[1].x___; 25: CMP temp[4].x, -temp[1].x___, temp[4].x___, const[5].y___; 26: MUL temp[1].x, temp[4].x___, const[5].z___; 27: MIN temp[4].x, const[4].y___, temp[1].x___; 28: MUL temp[1].x, temp[3].x___, temp[4].x___; 29: ADD temp[3].xy, temp[2].xy__, const[5].wx__; 30: TEX temp[9].xyz, temp[3].xy__, 2D[0]; 31: MOV temp[4].xyz, temp[9].xyz_; 32: DP3 temp[3].x, temp[4].xyz_, temp[4].xyz_; 33: RSQ temp[4].x, |temp[3].x___|; 34: MUL temp[4].x, temp[4].x___, temp[3].x___; 35: CMP temp[4].x, -temp[3].x___, temp[4].x___, const[5].y___; 36: MUL temp[3].x, temp[4].x___, const[5].z___; 37: MIN temp[4].x, const[4].y___, temp[3].x___; 38: MUL temp[3].x, temp[1].x___, temp[4].x___; 39: ADD temp[1].xy, temp[2].xy__, const[5].ww__; 40: TEX temp[10].xyz, temp[1].xy__, 2D[0]; 41: MOV temp[4].xyz, temp[10].xyz_; 42: DP3 temp[1].x, temp[4].xyz_, temp[4].xyz_; 43: RSQ temp[4].x, |temp[1].x___|; 44: MUL temp[4].x, temp[4].x___, temp[1].x___; 45: CMP temp[4].x, -temp[1].x___, temp[4].x___, const[5].y___; 46: MUL temp[1].x, temp[4].x___, const[5].z___; 47: MIN temp[4].x, const[4].y___, temp[1].x___; 48: MUL temp[1].x, temp[3].x___, temp[4].x___; 49: MOV temp[3].w, const[4].___y; 50: ADD temp[4].x, const[4].y___, -temp[1].x___; 51: MUL temp[5].xy, temp[0].xy__, temp[4].xx__; 52: MAD temp[0].xy, temp[2].xy__, temp[1].xx__, temp[5].xy__; 53: TEX temp[11].xyz, temp[0].xy__, 2D[0]; 54: MOV temp[1].xyz, temp[11].xyz_; 55: MOV temp[3].xyz, temp[1].xyz_; 56: MUL output[0], temp[3], const[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: RCP temp[0].x, input[0].w___; 1: MUL temp[1].xy, const[2].xy__, temp[0].xx__; 2: MAD temp[0].xy, input[0].xy__, temp[1].xy__, const[1].xy__; 3: TEX temp[6].xyz, input[1].xy__, 2D[1]; 4: MOV temp[1].xyz, temp[6].xyz_; 5: ADD temp[2].xyz, temp[1].xyz_, const[4].xxx_; 6: DP3 temp[1].x, temp[2].xyz_, temp[2].xyz_; 7: RSQ temp[3].x, |temp[1].x___|; 8: MUL temp[1].xy, temp[2].xy__, temp[3].xx__; 9: MAD temp[2].xy, temp[1].xy__, const[3].xy__, temp[0].xy__; 10: ADD temp[1].xy, temp[2].xy__, const[5].xx__; 11: TEX temp[7].xyz, temp[1].xy__, 2D[0]; 12: MOV temp[3].xyz, temp[7].xyz_; 13: ADD temp[1].xy, temp[2].xy__, const[5].xw__; 14: TEX temp[8].xyz, temp[1].xy__, 2D[0]; 15: MOV temp[4].xyz, temp[8].xyz_; 16: DP3 temp[1].x, temp[3].xyz_, temp[3].xyz_; 17: RSQ temp[3].x, |temp[1].x___|; 18: MUL temp[3].x, temp[3].x___, temp[1].x___; 19: CMP temp[3].x, -temp[1].x___, temp[3].x___, const[5].y___; 20: MUL temp[1].x, temp[3].x___, const[5].z___; 21: MIN temp[3].x, const[4].y___, temp[1].x___; 22: DP3 temp[1].x, temp[4].xyz_, temp[4].xyz_; 23: RSQ temp[4].x, |temp[1].x___|; 24: MUL temp[4].x, temp[4].x___, temp[1].x___; 25: CMP temp[4].x, -temp[1].x___, temp[4].x___, const[5].y___; 26: MUL temp[1].x, temp[4].x___, const[5].z___; 27: MIN temp[4].x, const[4].y___, temp[1].x___; 28: MUL temp[1].x, temp[3].x___, temp[4].x___; 29: ADD temp[3].xy, temp[2].xy__, const[5].wx__; 30: TEX temp[9].xyz, temp[3].xy__, 2D[0]; 31: MOV temp[4].xyz, temp[9].xyz_; 32: DP3 temp[3].x, temp[4].xyz_, temp[4].xyz_; 33: RSQ temp[4].x, |temp[3].x___|; 34: MUL temp[4].x, temp[4].x___, temp[3].x___; 35: CMP temp[4].x, -temp[3].x___, temp[4].x___, const[5].y___; 36: MUL temp[3].x, temp[4].x___, const[5].z___; 37: MIN temp[4].x, const[4].y___, temp[3].x___; 38: MUL temp[3].x, temp[1].x___, temp[4].x___; 39: ADD temp[1].xy, temp[2].xy__, const[5].ww__; 40: TEX temp[10].xyz, temp[1].xy__, 2D[0]; 41: MOV temp[4].xyz, temp[10].xyz_; 42: DP3 temp[1].x, temp[4].xyz_, temp[4].xyz_; 43: RSQ temp[4].x, |temp[1].x___|; 44: MUL temp[4].x, temp[4].x___, temp[1].x___; 45: CMP temp[4].x, -temp[1].x___, temp[4].x___, const[5].y___; 46: MUL temp[1].x, temp[4].x___, const[5].z___; 47: MIN temp[4].x, const[4].y___, temp[1].x___; 48: MUL temp[1].x, temp[3].x___, temp[4].x___; 49: MOV temp[3].w, const[4].___y; 50: ADD temp[4].x, const[4].y___, -temp[1].x___; 51: MUL temp[5].xy, temp[0].xy__, temp[4].xx__; 52: MAD temp[0].xy, temp[2].xy__, temp[1].xx__, temp[5].xy__; 53: TEX temp[11].xyz, temp[0].xy__, 2D[0]; 54: MOV temp[1].xyz, temp[11].xyz_; 55: MOV temp[3].xyz, temp[1].xyz_; 56: MUL output[0], temp[3], const[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: RCP temp[0].x, input[0].w___; 1: MUL temp[1].xy, const[2].xy__, temp[0].xx__; 2: MAD temp[0].xy, input[0].xy__, temp[1].xy__, const[1].xy__; 3: TEX temp[6].xyz, input[1].xy__, 2D[1]; 4: ADD temp[2].xyz, temp[6].xyz_, none.-H-H-H_; 5: DP3 temp[1].x, temp[2].xyz_, temp[2].xyz_; 6: RSQ temp[3].x, |temp[1].x___|; 7: MUL temp[1].xy, temp[2].xy__, temp[3].xx__; 8: MAD temp[2].xy, temp[1].xy__, const[3].xy__, temp[0].xy__; 9: ADD temp[1].xy, temp[2].xy__, const[5].xx__; 10: TEX temp[7].xyz, temp[1].xy__, 2D[0]; 11: ADD temp[1].xy, temp[2].xy__, const[5].xw__; 12: TEX temp[8].xyz, temp[1].xy__, 2D[0]; 13: DP3 temp[1].x, temp[7].xyz_, temp[7].xyz_; 14: RSQ temp[3].x, |temp[1].x___|; 15: MUL temp[3].x, temp[3].x___, temp[1].x___; 16: CMP temp[3].x, -temp[1].x___, temp[3].x___, none.0___; 17: MUL temp[1].x, temp[3].x___, const[5].z___; 18: MIN temp[3].x, none.1___, temp[1].x___; 19: DP3 temp[1].x, temp[8].xyz_, temp[8].xyz_; 20: RSQ temp[4].x, |temp[1].x___|; 21: MUL temp[4].x, temp[4].x___, temp[1].x___; 22: CMP temp[4].x, -temp[1].x___, temp[4].x___, none.0___; 23: MUL temp[1].x, temp[4].x___, const[5].z___; 24: MIN temp[4].x, none.1___, temp[1].x___; 25: MUL temp[1].x, temp[3].x___, temp[4].x___; 26: ADD temp[3].xy, temp[2].xy__, const[5].wx__; 27: TEX temp[9].xyz, temp[3].xy__, 2D[0]; 28: DP3 temp[3].x, temp[9].xyz_, temp[9].xyz_; 29: RSQ temp[4].x, |temp[3].x___|; 30: MUL temp[4].x, temp[4].x___, temp[3].x___; 31: CMP temp[4].x, -temp[3].x___, temp[4].x___, none.0___; 32: MUL temp[3].x, temp[4].x___, const[5].z___; 33: MIN temp[4].x, none.1___, temp[3].x___; 34: MUL temp[3].x, temp[1].x___, temp[4].x___; 35: ADD temp[1].xy, temp[2].xy__, const[5].ww__; 36: TEX temp[10].xyz, temp[1].xy__, 2D[0]; 37: DP3 temp[1].x, temp[10].xyz_, temp[10].xyz_; 38: RSQ temp[4].x, |temp[1].x___|; 39: MUL temp[4].x, temp[4].x___, temp[1].x___; 40: CMP temp[4].x, -temp[1].x___, temp[4].x___, none.0___; 41: MUL temp[1].x, temp[4].x___, const[5].z___; 42: MIN temp[4].x, none.1___, temp[1].x___; 43: MUL temp[1].x, temp[3].x___, temp[4].x___; 44: MOV temp[3].w, none.___1; 45: MUL temp[5].xy, temp[0].xy__, (1 - temp[1]).xx__; 46: MAD temp[0].xy, temp[2].xy__, temp[1].xx__, temp[5].xy__; 47: TEX temp[11].xyz, temp[0].xy__, 2D[0]; 48: MOV temp[3].xyz, temp[11].xyz_; 49: MUL output[0], temp[3], const[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: RCP temp[0].x, input[0].w___; 1: MUL temp[1].xy, const[2].xy__, temp[0].xx__; 2: MAD temp[0].xy, input[0].xy__, temp[1].xy__, const[1].xy__; 3: TEX temp[6].xyz, input[1].xy__, 2D[1]; 4: ADD temp[2].xyz, temp[6].xyz_, none.-H-H-H_; 5: DP3 temp[1].x, temp[2].xyz_, temp[2].xyz_; 6: RSQ temp[3].x, |temp[1].x___|; 7: MUL temp[1].xy, temp[2].xy__, temp[3].xx__; 8: MAD temp[2].xy, temp[1].xy__, const[3].xy__, temp[0].xy__; 9: ADD temp[1].xy, temp[2].xy__, const[5].xx__; 10: TEX temp[7].xyz, temp[1].xy__, 2D[0]; 11: MOV temp[12].x, const[5].x___; 12: MOV temp[12].y, const[5]._w__; 13: ADD temp[1].xy, temp[2].xy__, temp[12].xy__; 14: TEX temp[8].xyz, temp[1].xy__, 2D[0]; 15: DP3 temp[1].x, temp[7].xyz_, temp[7].xyz_; 16: RSQ temp[3].x, |temp[1].x___|; 17: MUL temp[3].x, temp[3].x___, temp[1].x___; 18: CMP temp[3].x, -temp[1].x___, temp[3].x___, none.0___; 19: MUL temp[1].x, temp[3].x___, const[5].z___; 20: MIN temp[3].x, none.1___, temp[1].x___; 21: DP3 temp[1].x, temp[8].xyz_, temp[8].xyz_; 22: RSQ temp[4].x, |temp[1].x___|; 23: MUL temp[4].x, temp[4].x___, temp[1].x___; 24: CMP temp[4].x, -temp[1].x___, temp[4].x___, none.0___; 25: MUL temp[1].x, temp[4].x___, const[5].z___; 26: MIN temp[4].x, none.1___, temp[1].x___; 27: MUL temp[1].x, temp[3].x___, temp[4].x___; 28: MOV temp[13].y, const[5]._x__; 29: MOV temp[13].x, const[5].w___; 30: ADD temp[3].xy, temp[2].xy__, temp[13].xy__; 31: TEX temp[9].xyz, temp[3].xy__, 2D[0]; 32: DP3 temp[3].x, temp[9].xyz_, temp[9].xyz_; 33: RSQ temp[4].x, |temp[3].x___|; 34: MUL temp[4].x, temp[4].x___, temp[3].x___; 35: CMP temp[4].x, -temp[3].x___, temp[4].x___, none.0___; 36: MUL temp[3].x, temp[4].x___, const[5].z___; 37: MIN temp[4].x, none.1___, temp[3].x___; 38: MUL temp[3].x, temp[1].x___, temp[4].x___; 39: ADD temp[1].xy, temp[2].xy__, const[5].ww__; 40: TEX temp[10].xyz, temp[1].xy__, 2D[0]; 41: DP3 temp[1].x, temp[10].xyz_, temp[10].xyz_; 42: RSQ temp[4].x, |temp[1].x___|; 43: MUL temp[4].x, temp[4].x___, temp[1].x___; 44: CMP temp[4].x, -temp[1].x___, temp[4].x___, none.0___; 45: MUL temp[1].x, temp[4].x___, const[5].z___; 46: MIN temp[4].x, none.1___, temp[1].x___; 47: MUL temp[1].x, temp[3].x___, temp[4].x___; 48: MOV temp[3].w, none.___1; 49: MUL temp[5].xy, temp[0].xy__, (1 - temp[1]).xx__; 50: MAD temp[0].xy, temp[2].xy__, temp[1].xx__, temp[5].xy__; 51: TEX temp[11].xyz, temp[0].xy__, 2D[0]; 52: MOV temp[3].xyz, temp[11].xyz_; 53: MUL output[0], temp[3], const[0]; CONST[4] = { 0.0100 0.0000 20.0000 -0.0100 } Fragment Program: after 'dead constants' # Radeon Compiler Program 0: RCP temp[0].x, input[0].w___; 1: MUL temp[1].xy, const[2].xy__, temp[0].xx__; 2: MAD temp[0].xy, input[0].xy__, temp[1].xy__, const[1].xy__; 3: TEX temp[6].xyz, input[1].xy__, 2D[1]; 4: ADD temp[2].xyz, temp[6].xyz_, none.-H-H-H_; 5: DP3 temp[1].x, temp[2].xyz_, temp[2].xyz_; 6: RSQ temp[3].x, |temp[1].x___|; 7: MUL temp[1].xy, temp[2].xy__, temp[3].xx__; 8: MAD temp[2].xy, temp[1].xy__, const[3].xy__, temp[0].xy__; 9: ADD temp[1].xy, temp[2].xy__, const[4].xx__; 10: TEX temp[7].xyz, temp[1].xy__, 2D[0]; 11: MOV temp[12].x, const[4].x___; 12: MOV temp[12].y, const[4]._w__; 13: ADD temp[1].xy, temp[2].xy__, temp[12].xy__; 14: TEX temp[8].xyz, temp[1].xy__, 2D[0]; 15: DP3 temp[1].x, temp[7].xyz_, temp[7].xyz_; 16: RSQ temp[3].x, |temp[1].x___|; 17: MUL temp[3].x, temp[3].x___, temp[1].x___; 18: CMP temp[3].x, -temp[1].x___, temp[3].x___, none.0___; 19: MUL temp[1].x, temp[3].x___, const[4].z___; 20: MIN temp[3].x, none.1___, temp[1].x___; 21: DP3 temp[1].x, temp[8].xyz_, temp[8].xyz_; 22: RSQ temp[4].x, |temp[1].x___|; 23: MUL temp[4].x, temp[4].x___, temp[1].x___; 24: CMP temp[4].x, -temp[1].x___, temp[4].x___, none.0___; 25: MUL temp[1].x, temp[4].x___, const[4].z___; 26: MIN temp[4].x, none.1___, temp[1].x___; 27: MUL temp[1].x, temp[3].x___, temp[4].x___; 28: MOV temp[13].y, const[4]._x__; 29: MOV temp[13].x, const[4].w___; 30: ADD temp[3].xy, temp[2].xy__, temp[13].xy__; 31: TEX temp[9].xyz, temp[3].xy__, 2D[0]; 32: DP3 temp[3].x, temp[9].xyz_, temp[9].xyz_; 33: RSQ temp[4].x, |temp[3].x___|; 34: MUL temp[4].x, temp[4].x___, temp[3].x___; 35: CMP temp[4].x, -temp[3].x___, temp[4].x___, none.0___; 36: MUL temp[3].x, temp[4].x___, const[4].z___; 37: MIN temp[4].x, none.1___, temp[3].x___; 38: MUL temp[3].x, temp[1].x___, temp[4].x___; 39: ADD temp[1].xy, temp[2].xy__, const[4].ww__; 40: TEX temp[10].xyz, temp[1].xy__, 2D[0]; 41: DP3 temp[1].x, temp[10].xyz_, temp[10].xyz_; 42: RSQ temp[4].x, |temp[1].x___|; 43: MUL temp[4].x, temp[4].x___, temp[1].x___; 44: CMP temp[4].x, -temp[1].x___, temp[4].x___, none.0___; 45: MUL temp[1].x, temp[4].x___, const[4].z___; 46: MIN temp[4].x, none.1___, temp[1].x___; 47: MUL temp[1].x, temp[3].x___, temp[4].x___; 48: MOV temp[3].w, none.___1; 49: MUL temp[5].xy, temp[0].xy__, (1 - temp[1]).xx__; 50: MAD temp[0].xy, temp[2].xy__, temp[1].xx__, temp[5].xy__; 51: TEX temp[11].xyz, temp[0].xy__, 2D[0]; 52: MOV temp[3].xyz, temp[11].xyz_; 53: MUL output[0], temp[3], const[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: RCP temp[0].x, input[0].w___; 1: MUL temp[1].xy, const[2].xy__, temp[0].xx__; 2: MAD temp[0].xy, input[0].xy__, temp[1].xy__, const[1].xy__; 3: TEX temp[2].xyz, input[1].xy__, 2D[1]; 4: ADD temp[3].xyz, temp[2].xyz_, none.-H-H-H_; 5: DP3 temp[1].x, temp[3].xyz_, temp[3].xyz_; 6: RSQ temp[4].x, |temp[1].x___|; 7: MUL temp[5].xy, temp[3].xy__, temp[4].xx__; 8: MAD temp[3].xy, temp[5].xy__, const[3].xy__, temp[0].xy__; 9: ADD temp[6].xy, temp[3].xy__, const[4].xx__; 10: TEX temp[7].xyz, temp[6].xy__, 2D[0]; 11: MOV temp[8].x, const[4].x___; 12: MOV temp[8].y, const[4]._w__; 13: ADD temp[9].xy, temp[3].xy__, temp[8].xy__; 14: TEX temp[10].xyz, temp[9].xy__, 2D[0]; 15: DP3 temp[9].x, temp[7].xyz_, temp[7].xyz_; 16: RSQ temp[11].x, |temp[9].x___|; 17: MUL temp[12].x, temp[11].x___, temp[9].x___; 18: CMP temp[13].x, -temp[9].x___, temp[12].x___, none.0___; 19: MUL temp[9].x, temp[13].x___, const[4].z___; 20: MIN temp[14].x, none.1___, temp[9].x___; 21: DP3 temp[9].x, temp[10].xyz_, temp[10].xyz_; 22: RSQ temp[15].x, |temp[9].x___|; 23: MUL temp[16].x, temp[15].x___, temp[9].x___; 24: CMP temp[17].x, -temp[9].x___, temp[16].x___, none.0___; 25: MUL temp[9].x, temp[17].x___, const[4].z___; 26: MIN temp[18].x, none.1___, temp[9].x___; 27: MUL temp[9].x, temp[14].x___, temp[18].x___; 28: MOV temp[19].y, const[4]._x__; 29: MOV temp[19].x, const[4].w___; 30: ADD temp[14].xy, temp[3].xy__, temp[19].xy__; 31: TEX temp[20].xyz, temp[14].xy__, 2D[0]; 32: DP3 temp[14].x, temp[20].xyz_, temp[20].xyz_; 33: RSQ temp[21].x, |temp[14].x___|; 34: MUL temp[22].x, temp[21].x___, temp[14].x___; 35: CMP temp[23].x, -temp[14].x___, temp[22].x___, none.0___; 36: MUL temp[14].x, temp[23].x___, const[4].z___; 37: MIN temp[24].x, none.1___, temp[14].x___; 38: MUL temp[14].x, temp[9].x___, temp[24].x___; 39: ADD temp[25].xy, temp[3].xy__, const[4].ww__; 40: TEX temp[26].xyz, temp[25].xy__, 2D[0]; 41: DP3 temp[25].x, temp[26].xyz_, temp[26].xyz_; 42: RSQ temp[27].x, |temp[25].x___|; 43: MUL temp[28].x, temp[27].x___, temp[25].x___; 44: CMP temp[29].x, -temp[25].x___, temp[28].x___, none.0___; 45: MUL temp[25].x, temp[29].x___, const[4].z___; 46: MIN temp[30].x, none.1___, temp[25].x___; 47: MUL temp[25].x, temp[14].x___, temp[30].x___; 48: MOV temp[14].w, none.___1; 49: MUL temp[31].xy, temp[0].xy__, (1 - temp[25]).xx__; 50: MAD temp[32].xy, temp[3].xy__, temp[25].xx__, temp[31].xy__; 51: TEX temp[33].xyz, temp[32].xy__, 2D[0]; 52: MOV temp[14].xyz, temp[33].xyz_; 53: MUL output[0], temp[14], const[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: src0.w = input[0] REPL_ALPHA temp[0].x RCP, src0.w 1: src0.xyz = const[2], src1.xyz = temp[0] MAD temp[1].xy, src0.xy_, src1.xx_, src0.000 2: src0.xyz = input[0], src1.xyz = temp[1], src2.xyz = const[1] MAD temp[0].xy, src0.xy_, src1.xy_, src2.xy_ 3: TEX temp[2].xyz, input[1].xy__, 2D[1]; 4: src0.xyz = temp[2] MAD temp[3].xyz, src0.xyz, src0.111, -src0.HHH 5: src0.xyz = temp[3] DP3 temp[1].x, src0.xyz, src0.xyz 6: src0.xyz = temp[1] REPL_ALPHA temp[4].x RSQ, |src0.x| 7: src0.xyz = temp[3], src1.xyz = temp[4] MAD temp[5].xy, src0.xy_, src1.xx_, src0.000 8: src0.xyz = temp[5], src1.xyz = const[3], src2.xyz = temp[0] MAD temp[3].xy, src0.xy_, src1.xy_, src2.xy_ 9: src0.xyz = temp[3], src1.xyz = const[4] MAD temp[6].xy, src0.xy_, src0.111, src1.xx_ 10: TEX temp[7].xyz, temp[6].xy__, 2D[0]; 11: src0.xyz = const[4] MAD temp[8].x, src0.x__, src0.111, src0.000 12: src0.w = const[4] MAD temp[8].y, src0._w_, src0.111, src0.000 13: src0.xyz = temp[3], src1.xyz = temp[8] MAD temp[9].xy, src0.xy_, src0.111, src1.xy_ 14: TEX temp[10].xyz, temp[9].xy__, 2D[0]; 15: src0.xyz = temp[7] DP3 temp[9].x, src0.xyz, src0.xyz 16: src0.xyz = temp[9] REPL_ALPHA temp[11].x RSQ, |src0.x| 17: src0.xyz = temp[11], src1.xyz = temp[9] MAD temp[12].x, src0.x__, src1.x__, src0.000 18: src0.xyz = temp[12], src1.xyz = temp[9] CMP temp[13].x, src0.0__, src0.x__, -src1.x__ 19: src0.xyz = temp[13], src1.xyz = const[4] MAD temp[9].x, src0.x__, src1.z__, src0.000 20: src0.xyz = temp[9] MIN temp[14].x, src0.1__, src0.x__ 21: src0.xyz = temp[10] DP3 temp[9].x, src0.xyz, src0.xyz 22: src0.xyz = temp[9] REPL_ALPHA temp[15].x RSQ, |src0.x| 23: src0.xyz = temp[15], src1.xyz = temp[9] MAD temp[16].x, src0.x__, src1.x__, src0.000 24: src0.xyz = temp[16], src1.xyz = temp[9] CMP temp[17].x, src0.0__, src0.x__, -src1.x__ 25: src0.xyz = temp[17], src1.xyz = const[4] MAD temp[9].x, src0.x__, src1.z__, src0.000 26: src0.xyz = temp[9] MIN temp[18].x, src0.1__, src0.x__ 27: src0.xyz = temp[14], src1.xyz = temp[18] MAD temp[9].x, src0.x__, src1.x__, src0.000 28: src0.xyz = const[4] MAD temp[19].y, src0._x_, src0.111, src0.000 29: src0.w = const[4] MAD temp[19].x, src0.w__, src0.111, src0.000 30: src0.xyz = temp[3], src1.xyz = temp[19] MAD temp[14].xy, src0.xy_, src0.111, src1.xy_ 31: TEX temp[20].xyz, temp[14].xy__, 2D[0]; 32: src0.xyz = temp[20] DP3 temp[14].x, src0.xyz, src0.xyz 33: src0.xyz = temp[14] REPL_ALPHA temp[21].x RSQ, |src0.x| 34: src0.xyz = temp[21], src1.xyz = temp[14] MAD temp[22].x, src0.x__, src1.x__, src0.000 35: src0.xyz = temp[22], src1.xyz = temp[14] CMP temp[23].x, src0.0__, src0.x__, -src1.x__ 36: src0.xyz = temp[23], src1.xyz = const[4] MAD temp[14].x, src0.x__, src1.z__, src0.000 37: src0.xyz = temp[14] MIN temp[24].x, src0.1__, src0.x__ 38: src0.xyz = temp[9], src1.xyz = temp[24] MAD temp[14].x, src0.x__, src1.x__, src0.000 39: src0.xyz = temp[3], src0.w = const[4] MAD temp[25].xy, src0.xy_, src0.111, src0.ww_ 40: TEX temp[26].xyz, temp[25].xy__, 2D[0]; 41: src0.xyz = temp[26] DP3 temp[25].x, src0.xyz, src0.xyz 42: src0.xyz = temp[25] REPL_ALPHA temp[27].x RSQ, |src0.x| 43: src0.xyz = temp[27], src1.xyz = temp[25] MAD temp[28].x, src0.x__, src1.x__, src0.000 44: src0.xyz = temp[28], src1.xyz = temp[25] CMP temp[29].x, src0.0__, src0.x__, -src1.x__ 45: src0.xyz = temp[29], src1.xyz = const[4] MAD temp[25].x, src0.x__, src1.z__, src0.000 46: src0.xyz = temp[25] MIN temp[30].x, src0.1__, src0.x__ 47: src0.xyz = temp[14], src1.xyz = temp[30] MAD temp[25].x, src0.x__, src1.x__, src0.000 48: MAD temp[14].w, src0.1, src0.1, src0.0 49: src0.xyz = temp[25], src1.xyz = temp[0], srcp.xyz = (1 - src0) MAD temp[31].xy, src1.xy_, srcp.xx_, src0.000 50: src0.xyz = temp[3], src1.xyz = temp[25], src2.xyz = temp[31] MAD temp[32].xy, src0.xy_, src1.xx_, src2.xy_ 51: TEX temp[33].xyz, temp[32].xy__, 2D[0]; 52: src0.xyz = temp[33] MAD temp[14].xyz, src0.xyz, src0.111, src0.000 53: src0.xyz = temp[14], src0.w = temp[14], src1.xyz = const[0], src1.w = const[0] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[2].xyz, input[1].xy__, 2D[1]; 2: src0.w = input[0] REPL_ALPHA temp[0].x RCP, src0.w 3: src0.xyz = const[2], src1.xyz = temp[0] MAD temp[1].xy, src0.xy_, src1.xx_, src0.000 MAD temp[14].w, src0.1, src0.1, src0.0 4: src0.xyz = input[0], src1.xyz = temp[1], src2.xyz = const[1] MAD temp[0].xy, src0.xy_, src1.xy_, src2.xy_ 5: src0.xyz = temp[2] MAD temp[3].xyz, src0.xyz, src0.111, -src0.HHH 6: src0.xyz = temp[3] DP3 temp[1].x, src0.xyz, src0.xyz 7: src0.xyz = temp[1] REPL_ALPHA temp[4].x RSQ, |src0.x| 8: src0.xyz = temp[3], src1.xyz = temp[4] MAD temp[5].xy, src0.xy_, src1.xx_, src0.000 9: src0.xyz = temp[5], src1.xyz = const[3], src2.xyz = temp[0] MAD temp[3].xy, src0.xy_, src1.xy_, src2.xy_ 10: src0.xyz = temp[3], src1.xyz = const[4] MAD temp[6].xy, src0.xy_, src0.111, src1.xx_ 11: src0.xyz = temp[3], src0.w = const[4] MAD temp[25].xy, src0.xy_, src0.111, src0.ww_ 12: src0.w = const[4] MAD temp[19].x, src0.w__, src0.111, src0.000 13: src0.xyz = const[4] MAD temp[19].y, src0._x_, src0.111, src0.000 14: src0.w = const[4] MAD temp[8].y, src0._w_, src0.111, src0.000 15: src0.xyz = const[4] MAD temp[8].x, src0.x__, src0.111, src0.000 16: src0.xyz = temp[3], src1.xyz = temp[8] MAD temp[9].xy, src0.xy_, src0.111, src1.xy_ 17: BEGIN_TEX; 18: TEX temp[7].xyz, temp[6].xy__, 2D[0]; 19: TEX temp[26].xyz, temp[25].xy__, 2D[0]; 20: TEX temp[10].xyz, temp[9].xy__, 2D[0]; 21: src0.xyz = temp[26] DP3 temp[25].x, src0.xyz, src0.xyz 22: src0.xyz = temp[25] REPL_ALPHA temp[27].x RSQ, |src0.x| 23: src0.xyz = temp[27], src1.xyz = temp[25] MAD temp[28].x, src0.x__, src1.x__, src0.000 24: src0.xyz = temp[28], src1.xyz = temp[25] CMP temp[29].x, src0.0__, src0.x__, -src1.x__ 25: src0.xyz = temp[29], src1.xyz = const[4] MAD temp[25].x, src0.x__, src1.z__, src0.000 26: src0.xyz = temp[25] MIN temp[30].x, src0.1__, src0.x__ 27: src0.xyz = temp[7] DP3 temp[9].x, src0.xyz, src0.xyz 28: src0.xyz = temp[9] REPL_ALPHA temp[11].x RSQ, |src0.x| 29: src0.xyz = temp[11], src1.xyz = temp[9] MAD temp[12].x, src0.x__, src1.x__, src0.000 30: src0.xyz = temp[12], src1.xyz = temp[9] CMP temp[13].x, src0.0__, src0.x__, -src1.x__ 31: src0.xyz = temp[13], src1.xyz = const[4] MAD temp[9].x, src0.x__, src1.z__, src0.000 32: src0.xyz = temp[9] MIN temp[14].x, src0.1__, src0.x__ 33: src0.xyz = temp[10] DP3 temp[9].x, src0.xyz, src0.xyz 34: src0.xyz = temp[9] REPL_ALPHA temp[15].x RSQ, |src0.x| 35: src0.xyz = temp[15], src1.xyz = temp[9] MAD temp[16].x, src0.x__, src1.x__, src0.000 36: src0.xyz = temp[16], src1.xyz = temp[9] CMP temp[17].x, src0.0__, src0.x__, -src1.x__ 37: src0.xyz = temp[17], src1.xyz = const[4] MAD temp[9].x, src0.x__, src1.z__, src0.000 38: src0.xyz = temp[9] MIN temp[18].x, src0.1__, src0.x__ 39: src0.xyz = temp[14], src1.xyz = temp[18] MAD temp[9].x, src0.x__, src1.x__, src0.000 40: src0.xyz = temp[3], src1.xyz = temp[19] MAD temp[14].xy, src0.xy_, src0.111, src1.xy_ 41: BEGIN_TEX; 42: TEX temp[20].xyz, temp[14].xy__, 2D[0]; 43: src0.xyz = temp[20] DP3 temp[14].x, src0.xyz, src0.xyz 44: src0.xyz = temp[14] REPL_ALPHA temp[21].x RSQ, |src0.x| 45: src0.xyz = temp[21], src1.xyz = temp[14] MAD temp[22].x, src0.x__, src1.x__, src0.000 46: src0.xyz = temp[22], src1.xyz = temp[14] CMP temp[23].x, src0.0__, src0.x__, -src1.x__ 47: src0.xyz = temp[23], src1.xyz = const[4] MAD temp[14].x, src0.x__, src1.z__, src0.000 48: src0.xyz = temp[14] MIN temp[24].x, src0.1__, src0.x__ 49: src0.xyz = temp[9], src1.xyz = temp[24] MAD temp[14].x, src0.x__, src1.x__, src0.000 50: src0.xyz = temp[14], src1.xyz = temp[30] MAD temp[25].x, src0.x__, src1.x__, src0.000 51: src0.xyz = temp[25], src1.xyz = temp[0], srcp.xyz = (1 - src0) MAD temp[31].xy, src1.xy_, srcp.xx_, src0.000 52: src0.xyz = temp[3], src1.xyz = temp[25], src2.xyz = temp[31] MAD temp[32].xy, src0.xy_, src1.xx_, src2.xy_ 53: BEGIN_TEX; 54: TEX temp[33].xyz, temp[32].xy__, 2D[0]; 55: src0.xyz = temp[33] MAD temp[14].xyz, src0.xyz, src0.111, src0.000 56: src0.xyz = temp[14], src0.w = temp[14], src1.xyz = const[0], src1.w = const[0] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[3].xyz, temp[1].xy__, 2D[1]; 2: src0.w = temp[0] REPL_ALPHA temp[1].x RCP, src0.w 3: src0.xyz = const[2], src1.xyz = temp[1] MAD temp[2].xy, src0.xy_, src1.xx_, src0.000 MAD temp[5].w, src0.1, src0.1, src0.0 4: src0.xyz = temp[0], src1.xyz = temp[2], src2.xyz = const[1] MAD temp[1].xy, src0.xy_, src1.xy_, src2.xy_ 5: src0.xyz = temp[3] MAD temp[0].xyz, src0.xyz, src0.111, -src0.HHH 6: src0.xyz = temp[0] DP3 temp[2].x, src0.xyz, src0.xyz 7: src0.xyz = temp[2] REPL_ALPHA temp[2].x RSQ, |src0.x| 8: src0.xyz = temp[0], src1.xyz = temp[2] MAD temp[2].xy, src0.xy_, src1.xx_, src0.000 9: src0.xyz = temp[2], src1.xyz = const[3], src2.xyz = temp[1] MAD temp[0].xy, src0.xy_, src1.xy_, src2.xy_ 10: src0.xyz = temp[0], src1.xyz = const[4] MAD temp[2].xy, src0.xy_, src0.111, src1.xx_ 11: src0.xyz = temp[0], src0.w = const[4] MAD temp[7].xy, src0.xy_, src0.111, src0.ww_ 12: src0.w = const[4] MAD temp[6].x, src0.w__, src0.111, src0.000 13: src0.xyz = const[4] MAD temp[6].y, src0._x_, src0.111, src0.000 14: src0.w = const[4] MAD temp[3].y, src0._w_, src0.111, src0.000 15: src0.xyz = const[4] MAD temp[3].x, src0.x__, src0.111, src0.000 16: src0.xyz = temp[0], src1.xyz = temp[3] MAD temp[3].xy, src0.xy_, src0.111, src1.xy_ 17: BEGIN_TEX; 18: TEX temp[2].xyz, temp[2].xy__, 2D[0]; 19: TEX temp[8].xyz, temp[7].xy__, 2D[0]; 20: TEX temp[4].xyz, temp[3].xy__, 2D[0]; 21: src0.xyz = temp[8] DP3 temp[7].x, src0.xyz, src0.xyz 22: src0.xyz = temp[7] REPL_ALPHA temp[8].x RSQ, |src0.x| 23: src0.xyz = temp[8], src1.xyz = temp[7] MAD temp[8].x, src0.x__, src1.x__, src0.000 24: src0.xyz = temp[8], src1.xyz = temp[7] CMP temp[8].x, src0.0__, src0.x__, -src1.x__ 25: src0.xyz = temp[8], src1.xyz = const[4] MAD temp[7].x, src0.x__, src1.z__, src0.000 26: src0.xyz = temp[7] MIN temp[8].x, src0.1__, src0.x__ 27: src0.xyz = temp[2] DP3 temp[3].x, src0.xyz, src0.xyz 28: src0.xyz = temp[3] REPL_ALPHA temp[2].x RSQ, |src0.x| 29: src0.xyz = temp[2], src1.xyz = temp[3] MAD temp[2].x, src0.x__, src1.x__, src0.000 30: src0.xyz = temp[2], src1.xyz = temp[3] CMP temp[2].x, src0.0__, src0.x__, -src1.x__ 31: src0.xyz = temp[2], src1.xyz = const[4] MAD temp[3].x, src0.x__, src1.z__, src0.000 32: src0.xyz = temp[3] MIN temp[5].x, src0.1__, src0.x__ 33: src0.xyz = temp[4] DP3 temp[3].x, src0.xyz, src0.xyz 34: src0.xyz = temp[3] REPL_ALPHA temp[2].x RSQ, |src0.x| 35: src0.xyz = temp[2], src1.xyz = temp[3] MAD temp[2].x, src0.x__, src1.x__, src0.000 36: src0.xyz = temp[2], src1.xyz = temp[3] CMP temp[2].x, src0.0__, src0.x__, -src1.x__ 37: src0.xyz = temp[2], src1.xyz = const[4] MAD temp[3].x, src0.x__, src1.z__, src0.000 38: src0.xyz = temp[3] MIN temp[2].x, src0.1__, src0.x__ 39: src0.xyz = temp[5], src1.xyz = temp[2] MAD temp[3].x, src0.x__, src1.x__, src0.000 40: src0.xyz = temp[0], src1.xyz = temp[6] MAD temp[5].xy, src0.xy_, src0.111, src1.xy_ 41: BEGIN_TEX; 42: TEX temp[2].xyz, temp[5].xy__, 2D[0]; 43: src0.xyz = temp[2] DP3 temp[5].x, src0.xyz, src0.xyz 44: src0.xyz = temp[5] REPL_ALPHA temp[2].x RSQ, |src0.x| 45: src0.xyz = temp[2], src1.xyz = temp[5] MAD temp[2].x, src0.x__, src1.x__, src0.000 46: src0.xyz = temp[2], src1.xyz = temp[5] CMP temp[2].x, src0.0__, src0.x__, -src1.x__ 47: src0.xyz = temp[2], src1.xyz = const[4] MAD temp[5].x, src0.x__, src1.z__, src0.000 48: src0.xyz = temp[5] MIN temp[2].x, src0.1__, src0.x__ 49: src0.xyz = temp[3], src1.xyz = temp[2] MAD temp[5].x, src0.x__, src1.x__, src0.000 50: src0.xyz = temp[5], src1.xyz = temp[8] MAD temp[7].x, src0.x__, src1.x__, src0.000 51: src0.xyz = temp[7], src1.xyz = temp[1], srcp.xyz = (1 - src0) MAD temp[1].xy, src1.xy_, srcp.xx_, src0.000 52: src0.xyz = temp[0], src1.xyz = temp[7], src2.xyz = temp[1] MAD temp[0].xy, src0.xy_, src1.xx_, src2.xy_ 53: BEGIN_TEX; 54: TEX temp[0].xyz, temp[0].xy__, 2D[0]; 55: src0.xyz = temp[0] MAD temp[5].xyz, src0.xyz, src0.111, src0.000 56: src0.xyz = temp[5], src0.w = temp[5], src1.xyz = const[0], src1.w = const[0] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 pc=16************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 14, tex_end: 0 (code_addr: 00000380) TEX: TEX t3, t1, texture[1] (000088c1) 0: xyz: t0 t0 t0 bias-> t1.x (00840000) w: t0 t0 t0 bias-> (00000000) xyz: t0.xxx t0.xxx t0.xxx op: 05004081 w: t0.w t0.x t0.x op: 05000009 1: xyz: c2 t1 t0 bias-> t2.xy (01880062) w: t0 t0 t0 bias-> t5.w (00940000) xyz: c2.xyz t1.xxx 0.0 op: 00050280 w: 1.0 1.0 0.0 op: 00040891 2: xyz: t0 t2 c1 bias-> t1.xy (01861080) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz t2.xyz c1.xyz op: 00020200 w: t0.x t0.x t0.x op: 00000000 3: xyz: t3 t0 t0 bias-> t0.xyz (03800003) w: t0 t0 t0 bias-> (00000000) xyz: t3.xyz 1.0 -0.5 op: 000d8a80 w: t3.x t3.x t3.x op: 00000000 4: xyz: t0 t0 t0 bias-> t2.x (00880000) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz t0.xyz t0.xxx op: 00804000 w: t0.x t0.x t0.x op: 00000000 5: xyz: t2 t0 t0 bias-> t2.x (00880002) w: t0 t0 t0 bias-> (00000000) xyz: t2.xxx t2.xxx t2.xxx op: 05004081 w: |t2.x| t2.x t2.x op: 05800040 6: xyz: t0 t2 t0 bias-> t2.xy (01880080) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz t2.xxx 0.0 op: 00050280 w: t0.x t0.x t0.x op: 00000000 7: xyz: t2 c3 t1 bias-> t0.xy (018018c2) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz c3.xyz t1.xyz op: 00020200 w: t2.x t2.x t2.x op: 00000000 8: xyz: t0 c4 t0 bias-> t2.xy (01880900) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz 1.0 c4.xxx op: 00014a80 w: t0.x t0.x t0.x op: 00000000 9: xyz: t0 t0 t0 bias-> t7.xy (019c0000) w: c4 t0 t0 bias-> (00000024) xyz: t0.xyz 1.0 c4.www op: 00030a80 w: t0.x t0.x t0.x op: 00000000 10: xyz: t0 t0 t0 bias-> t6.x (00980000) w: c4 t0 t0 bias-> (00000024) xyz: c4.www 1.0 0.0 op: 00050a8c w: t0.x t0.x t0.x op: 00000000 11: xyz: c4 t0 t0 bias-> t6.y (01180024) w: t0 t0 t0 bias-> (00000000) xyz: c4.xxx 1.0 0.0 op: 00050a81 w: c4.x c4.x c4.x op: 00000000 12: xyz: t0 t0 t0 bias-> t3.y (010c0000) w: c4 t0 t0 bias-> (00000024) xyz: c4.www 1.0 0.0 op: 00050a8c w: t0.x t0.x t0.x op: 00000000 13: xyz: c4 t0 t0 bias-> t3.x (008c0024) w: t0 t0 t0 bias-> (00000000) xyz: c4.xyz 1.0 0.0 op: 00050a80 w: c4.x c4.x c4.x op: 00000000 14: xyz: t0 t3 t0 bias-> t3.xy (018c00c0) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz 1.0 t3.xyz op: 00010a80 w: t0.x t0.x t0.x op: 00000000 NODE 1: alu_offset: 15, tex_offset: 1, alu_end: 19, tex_end: 2 (code_addr: 000414cf) TEX: TEX t2, t2, texture[0] (00008082) TEX t8, t7, texture[0] (00008207) TEX t4, t3, texture[0] (00008103) 15: xyz: t8 t0 t0 bias-> t7.x (009c0008) w: t0 t0 t0 bias-> (00000000) xyz: t8.xyz t8.xyz t8.xxx op: 00804000 w: t8.x t8.x t8.x op: 00000000 16: xyz: t7 t0 t0 bias-> t8.x (00a00007) w: t0 t0 t0 bias-> (00000000) xyz: t7.xxx t7.xxx t7.xxx op: 05004081 w: |t7.x| t7.x t7.x op: 05800040 17: xyz: t8 t7 t0 bias-> t8.x (00a001c8) w: t0 t0 t0 bias-> (00000000) xyz: t8.xyz t7.xyz 0.0 op: 00050200 w: t8.x t8.x t8.x op: 00000000 18: xyz: t8 t7 t0 bias-> t8.x (00a001c8) w: t0 t0 t0 bias-> (00000000) xyz: 0.0 t8.xyz -t7.xyz op: 04090014 w: t8.x t8.x t8.x op: 00000000 19: xyz: t8 c4 t0 bias-> t7.x (009c0908) w: t0 t0 t0 bias-> (00000000) xyz: t8.xyz c4.zzz 0.0 op: 00050380 w: t8.x t8.x t8.x op: 00000000 20: xyz: t7 t0 t0 bias-> t8.x (00a00007) w: t0 t0 t0 bias-> (00000000) xyz: 1.0 t7.xyz t7.xxx op: 02004015 w: t7.x t7.x t7.x op: 00000000 21: xyz: t2 t0 t0 bias-> t3.x (008c0002) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz t2.xyz t2.xxx op: 00804000 w: t2.x t2.x t2.x op: 00000000 22: xyz: t3 t0 t0 bias-> t2.x (00880003) w: t0 t0 t0 bias-> (00000000) xyz: t3.xxx t3.xxx t3.xxx op: 05004081 w: |t3.x| t3.x t3.x op: 05800040 23: xyz: t2 t3 t0 bias-> t2.x (008800c2) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz t3.xyz 0.0 op: 00050200 w: t2.x t2.x t2.x op: 00000000 24: xyz: t2 t3 t0 bias-> t2.x (008800c2) w: t0 t0 t0 bias-> (00000000) xyz: 0.0 t2.xyz -t3.xyz op: 04090014 w: t2.x t2.x t2.x op: 00000000 25: xyz: t2 c4 t0 bias-> t3.x (008c0902) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz c4.zzz 0.0 op: 00050380 w: t2.x t2.x t2.x op: 00000000 26: xyz: t3 t0 t0 bias-> t5.x (00940003) w: t0 t0 t0 bias-> (00000000) xyz: 1.0 t3.xyz t3.xxx op: 02004015 w: t3.x t3.x t3.x op: 00000000 27: xyz: t4 t0 t0 bias-> t3.x (008c0004) w: t0 t0 t0 bias-> (00000000) xyz: t4.xyz t4.xyz t4.xxx op: 00804000 w: t4.x t4.x t4.x op: 00000000 28: xyz: t3 t0 t0 bias-> t2.x (00880003) w: t0 t0 t0 bias-> (00000000) xyz: t3.xxx t3.xxx t3.xxx op: 05004081 w: |t3.x| t3.x t3.x op: 05800040 29: xyz: t2 t3 t0 bias-> t2.x (008800c2) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz t3.xyz 0.0 op: 00050200 w: t2.x t2.x t2.x op: 00000000 30: xyz: t2 t3 t0 bias-> t2.x (008800c2) w: t0 t0 t0 bias-> (00000000) xyz: 0.0 t2.xyz -t3.xyz op: 04090014 w: t2.x t2.x t2.x op: 00000000 31: xyz: t2 c4 t0 bias-> t3.x (008c0902) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz c4.zzz 0.0 op: 00050380 w: t2.x t2.x t2.x op: 00000000 32: xyz: t3 t0 t0 bias-> t2.x (00880003) w: t0 t0 t0 bias-> (00000000) xyz: 1.0 t3.xyz t3.xxx op: 02004015 w: t3.x t3.x t3.x op: 00000000 33: xyz: t5 t2 t0 bias-> t3.x (008c0085) w: t0 t0 t0 bias-> (00000000) xyz: t5.xyz t2.xyz 0.0 op: 00050200 w: t5.x t5.x t5.x op: 00000000 34: xyz: t0 t6 t0 bias-> t5.xy (01940180) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz 1.0 t6.xyz op: 00010a80 w: t0.x t0.x t0.x op: 00000000 NODE 2: alu_offset: 35, tex_offset: 4, alu_end: 9, tex_end: 0 (code_addr: 00004263) TEX: TEX t2, t5, texture[0] (00008085) 35: xyz: t2 t0 t0 bias-> t5.x (00940002) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz t2.xyz t2.xxx op: 00804000 w: t2.x t2.x t2.x op: 00000000 36: xyz: t5 t0 t0 bias-> t2.x (00880005) w: t0 t0 t0 bias-> (00000000) xyz: t5.xxx t5.xxx t5.xxx op: 05004081 w: |t5.x| t5.x t5.x op: 05800040 37: xyz: t2 t5 t0 bias-> t2.x (00880142) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz t5.xyz 0.0 op: 00050200 w: t2.x t2.x t2.x op: 00000000 38: xyz: t2 t5 t0 bias-> t2.x (00880142) w: t0 t0 t0 bias-> (00000000) xyz: 0.0 t2.xyz -t5.xyz op: 04090014 w: t2.x t2.x t2.x op: 00000000 39: xyz: t2 c4 t0 bias-> t5.x (00940902) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz c4.zzz 0.0 op: 00050380 w: t2.x t2.x t2.x op: 00000000 40: xyz: t5 t0 t0 bias-> t2.x (00880005) w: t0 t0 t0 bias-> (00000000) xyz: 1.0 t5.xyz t5.xxx op: 02004015 w: t5.x t5.x t5.x op: 00000000 41: xyz: t3 t2 t0 bias-> t5.x (00940083) w: t0 t0 t0 bias-> (00000000) xyz: t3.xyz t2.xyz 0.0 op: 00050200 w: t3.x t3.x t3.x op: 00000000 42: xyz: t5 t8 t0 bias-> t7.x (009c0205) w: t0 t0 t0 bias-> (00000000) xyz: t5.xyz t8.xyz 0.0 op: 80050200 NOP w: t5.x t5.x t5.x op: 00000000 43: xyz: t7 t1 t0 inv -> t1.xy (01840047) w: t0 t0 t0 bias-> (00000000) xyz: t1.xyz srcp.xxx 0.0 op: 00650804 w: t7.x t7.x t7.x op: 00000000 44: xyz: t0 t7 t1 bias-> t0.xy (018011c0) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz t7.xxx t1.xyz op: 00020280 w: t0.x t0.x t0.x op: 00000000 NODE 3: alu_offset: 45, tex_offset: 5, alu_end: 1, tex_end: 0 (code_addr: 0040506d) TEX: TEX t0, t0, texture[0] (00008000) 45: xyz: t0 t0 t0 bias-> t5.xyz (03940000) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.x t0.x t0.x op: 00000000 46: xyz: t5 c0 t0 bias-> o0.xyz (1c000805) w: t5 c0 t0 bias-> o0.w (01000805) xyz: t5.xyz c0.xyz 0.0 op: 00050200 w: t5.w c0.w 0.0 op: 00040509 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[10] DCL OUT[2], GENERIC[11] DCL CONST[0..7] DCL TEMP[0..1] 0: MUL TEMP[0], CONST[0], IN[1].xxxx 1: MAD TEMP[1], CONST[1], IN[1].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[1].zzzz, TEMP[1] 3: MAD OUT[2].xy, CONST[3].xyxx, IN[1].wwxx, TEMP[0].xyxx 4: MUL TEMP[0], CONST[4], IN[0].xxxx 5: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[0] 6: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[1] 7: MAD TEMP[1], CONST[7], IN[0].wwww, TEMP[0] 8: MOV OUT[0], TEMP[1] 9: MOV OUT[1], TEMP[1] 10: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[1], const[7], input[0].wwww, temp[0]; 8: MOV temp[2], temp[1]; 9: MOV output[1], temp[1]; 10: MOV output[0], temp[2]; 11: MOV output[3], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[1], const[7], input[0].wwww, temp[0]; 8: MOV temp[2], temp[1]; 9: MOV output[1], temp[1]; 10: MOV output[0], temp[2]; 11: MOV output[3], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[1], const[7], input[0].wwww, temp[0]; 8: MOV temp[2], temp[1]; 9: MOV output[1], temp[1]; 10: MOV output[0], temp[2]; 11: MOV output[3], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[1], const[7], input[0].wwww, temp[0]; 8: MOV temp[2], temp[1]; 9: MOV output[1], temp[1]; 10: MOV output[0], temp[2]; 11: MOV output[3], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[1], const[7], input[0].wwww, temp[0]; 8: MOV temp[2], temp[1]; 9: MOV output[1], temp[1]; 10: MOV output[0], temp[2]; 11: MOV output[3], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[1], const[7], input[0].wwww, temp[0]; 8: MOV temp[2], temp[1]; 9: MOV output[1], temp[1]; 10: MOV output[0], temp[2]; 11: MOV output[3], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[1], const[7], input[0].wwww, temp[0]; 8: MOV temp[2], temp[1]; 9: MOV output[1], temp[1]; 10: MOV output[0], temp[2]; 11: MOV output[3], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[1], const[7], input[0].wwww, temp[0]; 8: MOV output[1], temp[1]; 9: MOV output[0], temp[1]; 10: MOV output[3], temp[1]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[1], const[7], input[0].wwww, temp[0]; 8: MOV output[1], temp[1]; 9: MOV output[0], temp[1]; 10: MOV output[3], temp[1]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[1], const[7], input[0].wwww, temp[0]; 8: MOV output[1], temp[1]; 9: MOV output[0], temp[1]; 10: MOV output[3], temp[1]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[1], const[7], input[0].wwww, temp[0]; 8: MOV output[1], temp[1]; 9: MOV output[0], temp[1]; 10: MOV output[3], temp[1]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[1], const[7], input[0].wwww, temp[0]; 8: MOV output[1], temp[1]; 9: MOV output[0], temp[1]; 10: MOV output[3], temp[1]; Final vertex program code: 0: op: 0x00300002 dst: 0t op: VE_MULTIPLY src0: 0x01f90002 reg: 0c swiz: X/ Y/ U/ U src1: 0x01f80021 reg: 1i swiz: X/ X/ U/ U src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00302004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x01f90022 reg: 1c swiz: X/ Y/ U/ U src1: 0x01f92021 reg: 1i swiz: Y/ Y/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 2: op: 0x00300004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x01f90042 reg: 2c swiz: X/ Y/ U/ U src1: 0x01fa4021 reg: 1i swiz: Z/ Z/ U/ U src2: 0x01f90020 reg: 1t swiz: X/ Y/ U/ U 3: op: 0x00304204 dst: 2o op: VE_MULTIPLY_ADD src0: 0x01f90062 reg: 3c swiz: X/ Y/ U/ U src1: 0x01fb6021 reg: 1i swiz: W/ W/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 4: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 5: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 6: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 7: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 8: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W src1: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 src2: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 9: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W src1: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 src2: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 10: op: 0x00f06203 dst: 3o op: VE_ADD src0: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W src1: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 src2: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], GENERIC[10], PERSPECTIVE DCL IN[1], GENERIC[11], PERSPECTIVE DCL IN[2], GENERIC[12], PERSPECTIVE DCL IN[3], GENERIC[13], PERSPECTIVE DCL IN[4], GENERIC[14], PERSPECTIVE DCL IN[5], GENERIC[15], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL CONST[0] DCL CONST[3..12] DCL TEMP[0..6] IMM FLT32 { -0.5000, 2.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[5].xyyy, SAMP[5], 2D 1: MUL TEMP[1].w, TEMP[0].xxxw, CONST[7].xxxx 2: TEX TEMP[2].xyz, IN[5].xyyy, SAMP[2], 2D 3: TEX TEMP[3].xyz, IN[5].xyyy, SAMP[3], 2D 4: MUL TEMP[4].xyz, TEMP[3].xyzz, CONST[12].xyzz 5: MAD TEMP[3].xyz, TEMP[2].xyzz, CONST[11].xyzz, TEMP[4].xyzz 6: ADD TEMP[2].xyz, TEMP[0].xyzz, TEMP[3].xyzz 7: TEX TEMP[0].xyz, IN[5].xyyy, SAMP[6], 2D 8: ADD TEMP[3].xyz, TEMP[0].xyzz, IMM[0].xxxy 9: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz 10: RSQ TEMP[4].x, TEMP[0].xxxx 11: MUL TEMP[0].xyz, TEMP[3].xyzz, TEMP[4].xxxx 12: MOV TEMP[3].xyz, -IN[3].xyzx 13: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[3].xyzz 14: MUL TEMP[5].xyz, TEMP[4].xxxx, TEMP[0].xyzz 15: MUL TEMP[4].xyz, IMM[0].yyyy, TEMP[5].xyzz 16: ADD TEMP[5].xyz, TEMP[3].xyzz, -TEMP[4].xyzz 17: MUL TEMP[3].xyz, TEMP[5].xxxx, IN[2].xyzz 18: MAD TEMP[4].xyz, TEMP[5].yyyy, IN[1].xyzz, TEMP[3].xyzz 19: MAD TEMP[3].xyz, TEMP[5].zzzz, IN[0].xyzz, TEMP[4].xyzz 20: TEX TEMP[4].xyz, IN[5].xyyy, SAMP[1], 2D 21: MUL TEMP[5], CONST[3], TEMP[3].xxxx 22: MAD TEMP[6].xyz, CONST[4], TEMP[3].yyyy, TEMP[5] 23: MAD TEMP[5].xyz, CONST[5], TEMP[3].zzzz, TEMP[6] 24: TEX TEMP[3].xyz, TEMP[5].xyzz, SAMP[0], CUBE 25: MAD TEMP[5].xyz, TEMP[4].xyzz, TEMP[3].xyzz, TEMP[2].xyzz 26: DP3 TEMP[2].x, IN[4].xyzz, IN[4].xyzz 27: RSQ TEMP[3].x, TEMP[2].xxxx 28: MUL TEMP[2].xyz, IN[4].xyzz, TEMP[3].xxxx 29: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[2].xyzz 30: MAX TEMP[0].x, TEMP[3].xxxx, IMM[0].zzzz 31: MUL TEMP[2].xyz, CONST[9].xyzz, TEMP[0].xxxx 32: MAD TEMP[0].xyz, TEMP[2].xyzz, CONST[0].xyzz, CONST[10].xyzz 33: MUL TEMP[2].xyz, TEMP[5].xyzz, TEMP[0].xyzz 34: TEX TEMP[0].xyz, IN[5].xyyy, SAMP[4], 2D 35: MAD TEMP[1].xyz, TEMP[0].xyzx, CONST[8].xyzx, TEMP[2].xyzx 36: MOV OUT[0], TEMP[1] 37: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[5].xyyy, 2D[5]; 1: MUL temp[1].w, temp[0].xxxw, const[7].xxxx; 2: TEX temp[2].xyz, input[5].xyyy, 2D[2]; 3: TEX temp[3].xyz, input[5].xyyy, 2D[3]; 4: MUL temp[4].xyz, temp[3].xyzz, const[12].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[11].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: TEX temp[0].xyz, input[5].xyyy, 2D[6]; 8: ADD temp[3].xyz, temp[0].xyzz, const[13].xxxy; 9: DP3 temp[0].x, temp[3].xyzz, temp[3].xyzz; 10: RSQ temp[4].x, temp[0].xxxx; 11: MUL temp[0].xyz, temp[3].xyzz, temp[4].xxxx; 12: MOV temp[3].xyz, -input[3].xyzx; 13: DP3 temp[4].x, temp[0].xyzz, temp[3].xyzz; 14: MUL temp[5].xyz, temp[4].xxxx, temp[0].xyzz; 15: MUL temp[4].xyz, const[13].yyyy, temp[5].xyzz; 16: ADD temp[5].xyz, temp[3].xyzz, -temp[4].xyzz; 17: MUL temp[3].xyz, temp[5].xxxx, input[2].xyzz; 18: MAD temp[4].xyz, temp[5].yyyy, input[1].xyzz, temp[3].xyzz; 19: MAD temp[3].xyz, temp[5].zzzz, input[0].xyzz, temp[4].xyzz; 20: TEX temp[4].xyz, input[5].xyyy, 2D[1]; 21: MUL temp[5], const[3], temp[3].xxxx; 22: MAD temp[6].xyz, const[4], temp[3].yyyy, temp[5]; 23: MAD temp[5].xyz, const[5], temp[3].zzzz, temp[6]; 24: TEX temp[3].xyz, temp[5].xyzz, CUBE[0]; 25: MAD temp[5].xyz, temp[4].xyzz, temp[3].xyzz, temp[2].xyzz; 26: DP3 temp[2].x, input[4].xyzz, input[4].xyzz; 27: RSQ temp[3].x, temp[2].xxxx; 28: MUL temp[2].xyz, input[4].xyzz, temp[3].xxxx; 29: DP3 temp[3].x, temp[0].xyzz, temp[2].xyzz; 30: MAX temp[0].x, temp[3].xxxx, const[13].zzzz; 31: MUL temp[2].xyz, const[9].xyzz, temp[0].xxxx; 32: MAD temp[0].xyz, temp[2].xyzz, const[0].xyzz, const[10].xyzz; 33: MUL temp[2].xyz, temp[5].xyzz, temp[0].xyzz; 34: TEX temp[0].xyz, input[5].xyyy, 2D[4]; 35: MAD temp[1].xyz, temp[0].xyzx, const[8].xyzx, temp[2].xyzx; 36: MOV output[0], temp[1]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[5].xyyy, 2D[5]; 1: MUL temp[1].w, temp[0].xxxw, const[7].xxxx; 2: TEX temp[2].xyz, input[5].xyyy, 2D[2]; 3: TEX temp[3].xyz, input[5].xyyy, 2D[3]; 4: MUL temp[4].xyz, temp[3].xyzz, const[12].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[11].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: TEX temp[0].xyz, input[5].xyyy, 2D[6]; 8: ADD temp[3].xyz, temp[0].xyzz, const[13].xxxy; 9: DP3 temp[0].x, temp[3].xyzz, temp[3].xyzz; 10: RSQ temp[4].x, temp[0].xxxx; 11: MUL temp[0].xyz, temp[3].xyzz, temp[4].xxxx; 12: MOV temp[3].xyz, -input[3].xyzx; 13: DP3 temp[4].x, temp[0].xyzz, temp[3].xyzz; 14: MUL temp[5].xyz, temp[4].xxxx, temp[0].xyzz; 15: MUL temp[4].xyz, const[13].yyyy, temp[5].xyzz; 16: ADD temp[5].xyz, temp[3].xyzz, -temp[4].xyzz; 17: MUL temp[3].xyz, temp[5].xxxx, input[2].xyzz; 18: MAD temp[4].xyz, temp[5].yyyy, input[1].xyzz, temp[3].xyzz; 19: MAD temp[3].xyz, temp[5].zzzz, input[0].xyzz, temp[4].xyzz; 20: TEX temp[4].xyz, input[5].xyyy, 2D[1]; 21: MUL temp[5], const[3], temp[3].xxxx; 22: MAD temp[6].xyz, const[4], temp[3].yyyy, temp[5]; 23: MAD temp[5].xyz, const[5], temp[3].zzzz, temp[6]; 24: TEX temp[3].xyz, temp[5].xyzz, CUBE[0]; 25: MAD temp[5].xyz, temp[4].xyzz, temp[3].xyzz, temp[2].xyzz; 26: DP3 temp[2].x, input[4].xyzz, input[4].xyzz; 27: RSQ temp[3].x, temp[2].xxxx; 28: MUL temp[2].xyz, input[4].xyzz, temp[3].xxxx; 29: DP3 temp[3].x, temp[0].xyzz, temp[2].xyzz; 30: MAX temp[0].x, temp[3].xxxx, const[13].zzzz; 31: MUL temp[2].xyz, const[9].xyzz, temp[0].xxxx; 32: MAD temp[0].xyz, temp[2].xyzz, const[0].xyzz, const[10].xyzz; 33: MUL temp[2].xyz, temp[5].xyzz, temp[0].xyzz; 34: TEX temp[0].xyz, input[5].xyyy, 2D[4]; 35: MAD temp[1].xyz, temp[0].xyzx, const[8].xyzx, temp[2].xyzx; 36: MOV output[0], temp[1]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[5].xyyy, 2D[5]; 1: MUL temp[1].w, temp[0].xxxw, const[7].xxxx; 2: TEX temp[2].xyz, input[5].xyyy, 2D[2]; 3: TEX temp[3].xyz, input[5].xyyy, 2D[3]; 4: MUL temp[4].xyz, temp[3].xyzz, const[12].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[11].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: TEX temp[0].xyz, input[5].xyyy, 2D[6]; 8: ADD temp[3].xyz, temp[0].xyzz, const[13].xxxy; 9: DP3 temp[0].x, temp[3].xyzz, temp[3].xyzz; 10: RSQ temp[4].x, temp[0].xxxx; 11: MUL temp[0].xyz, temp[3].xyzz, temp[4].xxxx; 12: MOV temp[3].xyz, -input[3].xyzx; 13: DP3 temp[4].x, temp[0].xyzz, temp[3].xyzz; 14: MUL temp[5].xyz, temp[4].xxxx, temp[0].xyzz; 15: MUL temp[4].xyz, const[13].yyyy, temp[5].xyzz; 16: ADD temp[5].xyz, temp[3].xyzz, -temp[4].xyzz; 17: MUL temp[3].xyz, temp[5].xxxx, input[2].xyzz; 18: MAD temp[4].xyz, temp[5].yyyy, input[1].xyzz, temp[3].xyzz; 19: MAD temp[3].xyz, temp[5].zzzz, input[0].xyzz, temp[4].xyzz; 20: TEX temp[4].xyz, input[5].xyyy, 2D[1]; 21: MUL temp[5], const[3], temp[3].xxxx; 22: MAD temp[6].xyz, const[4], temp[3].yyyy, temp[5]; 23: MAD temp[5].xyz, const[5], temp[3].zzzz, temp[6]; 24: TEX temp[3].xyz, temp[5].xyzz, CUBE[0]; 25: MAD temp[5].xyz, temp[4].xyzz, temp[3].xyzz, temp[2].xyzz; 26: DP3 temp[2].x, input[4].xyzz, input[4].xyzz; 27: RSQ temp[3].x, temp[2].xxxx; 28: MUL temp[2].xyz, input[4].xyzz, temp[3].xxxx; 29: DP3 temp[3].x, temp[0].xyzz, temp[2].xyzz; 30: MAX temp[0].x, temp[3].xxxx, const[13].zzzz; 31: MUL temp[2].xyz, const[9].xyzz, temp[0].xxxx; 32: MAD temp[0].xyz, temp[2].xyzz, const[0].xyzz, const[10].xyzz; 33: MUL temp[2].xyz, temp[5].xyzz, temp[0].xyzz; 34: TEX temp[0].xyz, input[5].xyyy, 2D[4]; 35: MAD temp[1].xyz, temp[0].xyzx, const[8].xyzx, temp[2].xyzx; 36: MOV output[0], temp[1]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[5].xyyy, 2D[5]; 1: MUL temp[1].w, temp[0].xxxw, const[7].xxxx; 2: TEX temp[2].xyz, input[5].xyyy, 2D[2]; 3: TEX temp[3].xyz, input[5].xyyy, 2D[3]; 4: MUL temp[4].xyz, temp[3].xyzz, const[12].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[11].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: TEX temp[0].xyz, input[5].xyyy, 2D[6]; 8: ADD temp[3].xyz, temp[0].xyzz, const[13].xxxy; 9: DP3 temp[0].x, temp[3].xyzz, temp[3].xyzz; 10: RSQ temp[4].x, temp[0].xxxx; 11: MUL temp[0].xyz, temp[3].xyzz, temp[4].xxxx; 12: MOV temp[3].xyz, -input[3].xyzx; 13: DP3 temp[4].x, temp[0].xyzz, temp[3].xyzz; 14: MUL temp[5].xyz, temp[4].xxxx, temp[0].xyzz; 15: MUL temp[4].xyz, const[13].yyyy, temp[5].xyzz; 16: ADD temp[5].xyz, temp[3].xyzz, -temp[4].xyzz; 17: MUL temp[3].xyz, temp[5].xxxx, input[2].xyzz; 18: MAD temp[4].xyz, temp[5].yyyy, input[1].xyzz, temp[3].xyzz; 19: MAD temp[3].xyz, temp[5].zzzz, input[0].xyzz, temp[4].xyzz; 20: TEX temp[4].xyz, input[5].xyyy, 2D[1]; 21: MUL temp[5], const[3], temp[3].xxxx; 22: MAD temp[6].xyz, const[4], temp[3].yyyy, temp[5]; 23: MAD temp[5].xyz, const[5], temp[3].zzzz, temp[6]; 24: TEX temp[3].xyz, temp[5].xyzz, CUBE[0]; 25: MAD temp[5].xyz, temp[4].xyzz, temp[3].xyzz, temp[2].xyzz; 26: DP3 temp[2].x, input[4].xyzz, input[4].xyzz; 27: RSQ temp[3].x, temp[2].xxxx; 28: MUL temp[2].xyz, input[4].xyzz, temp[3].xxxx; 29: DP3 temp[3].x, temp[0].xyzz, temp[2].xyzz; 30: MAX temp[0].x, temp[3].xxxx, const[13].zzzz; 31: MUL temp[2].xyz, const[9].xyzz, temp[0].xxxx; 32: MAD temp[0].xyz, temp[2].xyzz, const[0].xyzz, const[10].xyzz; 33: MUL temp[2].xyz, temp[5].xyzz, temp[0].xyzz; 34: TEX temp[0].xyz, input[5].xyyy, 2D[4]; 35: MAD temp[1].xyz, temp[0].xyzx, const[8].xyzx, temp[2].xyzx; 36: MOV output[0], temp[1]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[5].xyyy, 2D[5]; 1: MUL temp[1].w, temp[0].xxxw, const[7].xxxx; 2: TEX temp[2].xyz, input[5].xyyy, 2D[2]; 3: TEX temp[3].xyz, input[5].xyyy, 2D[3]; 4: MUL temp[4].xyz, temp[3].xyzz, const[12].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[11].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: TEX temp[0].xyz, input[5].xyyy, 2D[6]; 8: ADD temp[3].xyz, temp[0].xyzz, const[13].xxxy; 9: DP3 temp[0].x, temp[3].xyzz, temp[3].xyzz; 10: RSQ temp[4].x, temp[0].xxxx; 11: MUL temp[0].xyz, temp[3].xyzz, temp[4].xxxx; 12: MOV temp[3].xyz, -input[3].xyzx; 13: DP3 temp[4].x, temp[0].xyzz, temp[3].xyzz; 14: MUL temp[5].xyz, temp[4].xxxx, temp[0].xyzz; 15: MUL temp[4].xyz, const[13].yyyy, temp[5].xyzz; 16: ADD temp[5].xyz, temp[3].xyzz, -temp[4].xyzz; 17: MUL temp[3].xyz, temp[5].xxxx, input[2].xyzz; 18: MAD temp[4].xyz, temp[5].yyyy, input[1].xyzz, temp[3].xyzz; 19: MAD temp[3].xyz, temp[5].zzzz, input[0].xyzz, temp[4].xyzz; 20: TEX temp[4].xyz, input[5].xyyy, 2D[1]; 21: MUL temp[5], const[3], temp[3].xxxx; 22: MAD temp[6].xyz, const[4], temp[3].yyyy, temp[5]; 23: MAD temp[5].xyz, const[5], temp[3].zzzz, temp[6]; 24: TEX temp[3].xyz, temp[5].xyzz, CUBE[0]; 25: MAD temp[5].xyz, temp[4].xyzz, temp[3].xyzz, temp[2].xyzz; 26: DP3 temp[2].x, input[4].xyzz, input[4].xyzz; 27: RSQ temp[3].x, temp[2].xxxx; 28: MUL temp[2].xyz, input[4].xyzz, temp[3].xxxx; 29: DP3 temp[3].x, temp[0].xyzz, temp[2].xyzz; 30: MAX temp[0].x, temp[3].xxxx, const[13].zzzz; 31: MUL temp[2].xyz, const[9].xyzz, temp[0].xxxx; 32: MAD temp[0].xyz, temp[2].xyzz, const[0].xyzz, const[10].xyzz; 33: MUL temp[2].xyz, temp[5].xyzz, temp[0].xyzz; 34: TEX temp[0].xyz, input[5].xyyy, 2D[4]; 35: MAD temp[1].xyz, temp[0].xyzx, const[8].xyzx, temp[2].xyzx; 36: MOV output[0], temp[1]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[5].xyyy, 2D[5]; 1: MUL temp[1].w, temp[0].xxxw, const[7].xxxx; 2: TEX temp[7], input[5].xyyy, 2D[2]; 3: MOV temp[2].xyz, temp[7]; 4: TEX temp[8], input[5].xyyy, 2D[3]; 5: MOV temp[3].xyz, temp[8]; 6: MUL temp[4].xyz, temp[3].xyzz, const[12].xyzz; 7: MAD temp[3].xyz, temp[2].xyzz, const[11].xyzz, temp[4].xyzz; 8: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 9: TEX temp[9], input[5].xyyy, 2D[6]; 10: MOV temp[0].xyz, temp[9]; 11: ADD temp[3].xyz, temp[0].xyzz, const[13].xxxy; 12: DP3 temp[0].x, temp[3].xyzz, temp[3].xyzz; 13: RSQ temp[4].x, temp[0].xxxx; 14: MUL temp[0].xyz, temp[3].xyzz, temp[4].xxxx; 15: MOV temp[3].xyz, -input[3].xyzx; 16: DP3 temp[4].x, temp[0].xyzz, temp[3].xyzz; 17: MUL temp[5].xyz, temp[4].xxxx, temp[0].xyzz; 18: MUL temp[4].xyz, const[13].yyyy, temp[5].xyzz; 19: ADD temp[5].xyz, temp[3].xyzz, -temp[4].xyzz; 20: MUL temp[3].xyz, temp[5].xxxx, input[2].xyzz; 21: MAD temp[4].xyz, temp[5].yyyy, input[1].xyzz, temp[3].xyzz; 22: MAD temp[3].xyz, temp[5].zzzz, input[0].xyzz, temp[4].xyzz; 23: TEX temp[10], input[5].xyyy, 2D[1]; 24: MOV temp[4].xyz, temp[10]; 25: MUL temp[5], const[3], temp[3].xxxx; 26: MAD temp[6].xyz, const[4], temp[3].yyyy, temp[5]; 27: MAD temp[5].xyz, const[5], temp[3].zzzz, temp[6]; 28: TEX temp[11], temp[5].xyzz, CUBE[0]; 29: MOV temp[3].xyz, temp[11]; 30: MAD temp[5].xyz, temp[4].xyzz, temp[3].xyzz, temp[2].xyzz; 31: DP3 temp[2].x, input[4].xyzz, input[4].xyzz; 32: RSQ temp[3].x, temp[2].xxxx; 33: MUL temp[2].xyz, input[4].xyzz, temp[3].xxxx; 34: DP3 temp[3].x, temp[0].xyzz, temp[2].xyzz; 35: MAX temp[0].x, temp[3].xxxx, const[13].zzzz; 36: MUL temp[2].xyz, const[9].xyzz, temp[0].xxxx; 37: MAD temp[0].xyz, temp[2].xyzz, const[0].xyzz, const[10].xyzz; 38: MUL temp[2].xyz, temp[5].xyzz, temp[0].xyzz; 39: TEX temp[12], input[5].xyyy, 2D[4]; 40: MOV temp[0].xyz, temp[12]; 41: MAD temp[1].xyz, temp[0].xyzx, const[8].xyzx, temp[2].xyzx; 42: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[5].xyyy, 2D[5]; 1: MUL temp[1].w, temp[0].xxxw, const[7].xxxx; 2: TEX temp[7], input[5].xyyy, 2D[2]; 3: MOV temp[2].xyz, temp[7]; 4: TEX temp[8], input[5].xyyy, 2D[3]; 5: MOV temp[3].xyz, temp[8]; 6: MUL temp[4].xyz, temp[3].xyzz, const[12].xyzz; 7: MAD temp[3].xyz, temp[2].xyzz, const[11].xyzz, temp[4].xyzz; 8: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 9: TEX temp[9], input[5].xyyy, 2D[6]; 10: MOV temp[0].xyz, temp[9]; 11: ADD temp[3].xyz, temp[0].xyzz, const[13].xxxy; 12: DP3 temp[0].x, temp[3].xyzz, temp[3].xyzz; 13: RSQ temp[4].x, |temp[0].xxxx|; 14: MUL temp[0].xyz, temp[3].xyzz, temp[4].xxxx; 15: MOV temp[3].xyz, -input[3].xyzx; 16: DP3 temp[4].x, temp[0].xyzz, temp[3].xyzz; 17: MUL temp[5].xyz, temp[4].xxxx, temp[0].xyzz; 18: MUL temp[4].xyz, const[13].yyyy, temp[5].xyzz; 19: ADD temp[5].xyz, temp[3].xyzz, -temp[4].xyzz; 20: MUL temp[3].xyz, temp[5].xxxx, input[2].xyzz; 21: MAD temp[4].xyz, temp[5].yyyy, input[1].xyzz, temp[3].xyzz; 22: MAD temp[3].xyz, temp[5].zzzz, input[0].xyzz, temp[4].xyzz; 23: TEX temp[10], input[5].xyyy, 2D[1]; 24: MOV temp[4].xyz, temp[10]; 25: MUL temp[5], const[3], temp[3].xxxx; 26: MAD temp[6].xyz, const[4], temp[3].yyyy, temp[5]; 27: MAD temp[5].xyz, const[5], temp[3].zzzz, temp[6]; 28: TEX temp[11], temp[5].xyzz, CUBE[0]; 29: MOV temp[3].xyz, temp[11]; 30: MAD temp[5].xyz, temp[4].xyzz, temp[3].xyzz, temp[2].xyzz; 31: DP3 temp[2].x, input[4].xyzz, input[4].xyzz; 32: RSQ temp[3].x, |temp[2].xxxx|; 33: MUL temp[2].xyz, input[4].xyzz, temp[3].xxxx; 34: DP3 temp[3].x, temp[0].xyzz, temp[2].xyzz; 35: MAX temp[0].x, temp[3].xxxx, const[13].zzzz; 36: MUL temp[2].xyz, const[9].xyzz, temp[0].xxxx; 37: MAD temp[0].xyz, temp[2].xyzz, const[0].xyzz, const[10].xyzz; 38: MUL temp[2].xyz, temp[5].xyzz, temp[0].xyzz; 39: TEX temp[12], input[5].xyyy, 2D[4]; 40: MOV temp[0].xyz, temp[12]; 41: MAD temp[1].xyz, temp[0].xyzx, const[8].xyzx, temp[2].xyzx; 42: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[5].xy__, 2D[5]; 1: MUL temp[1].w, temp[0].___w, const[7].___x; 2: TEX temp[7].xyz, input[5].xy__, 2D[2]; 3: MOV temp[2].xyz, temp[7].xyz_; 4: TEX temp[8].xyz, input[5].xy__, 2D[3]; 5: MOV temp[3].xyz, temp[8].xyz_; 6: MUL temp[4].xyz, temp[3].xyz_, const[12].xyz_; 7: MAD temp[3].xyz, temp[2].xyz_, const[11].xyz_, temp[4].xyz_; 8: ADD temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 9: TEX temp[9].xyz, input[5].xy__, 2D[6]; 10: MOV temp[0].xyz, temp[9].xyz_; 11: ADD temp[3].xyz, temp[0].xyz_, const[13].xxx_; 12: DP3 temp[0].x, temp[3].xyz_, temp[3].xyz_; 13: RSQ temp[4].x, |temp[0].x___|; 14: MUL temp[0].xyz, temp[3].xyz_, temp[4].xxx_; 15: MOV temp[3].xyz, -input[3].xyz_; 16: DP3 temp[4].x, temp[0].xyz_, temp[3].xyz_; 17: MUL temp[5].xyz, temp[4].xxx_, temp[0].xyz_; 18: MUL temp[4].xyz, const[13].yyy_, temp[5].xyz_; 19: ADD temp[5].xyz, temp[3].xyz_, -temp[4].xyz_; 20: MUL temp[3].xyz, temp[5].xxx_, input[2].xyz_; 21: MAD temp[4].xyz, temp[5].yyy_, input[1].xyz_, temp[3].xyz_; 22: MAD temp[3].xyz, temp[5].zzz_, input[0].xyz_, temp[4].xyz_; 23: TEX temp[10].xyz, input[5].xy__, 2D[1]; 24: MOV temp[4].xyz, temp[10].xyz_; 25: MUL temp[5].xyz, const[3].xyz_, temp[3].xxx_; 26: MAD temp[6].xyz, const[4].xyz_, temp[3].yyy_, temp[5].xyz_; 27: MAD temp[5].xyz, const[5].xyz_, temp[3].zzz_, temp[6].xyz_; 28: TEX temp[11].xyz, temp[5].xyz_, CUBE[0]; 29: MOV temp[3].xyz, temp[11].xyz_; 30: MAD temp[5].xyz, temp[4].xyz_, temp[3].xyz_, temp[2].xyz_; 31: DP3 temp[2].x, input[4].xyz_, input[4].xyz_; 32: RSQ temp[3].x, |temp[2].x___|; 33: MUL temp[2].xyz, input[4].xyz_, temp[3].xxx_; 34: DP3 temp[3].x, temp[0].xyz_, temp[2].xyz_; 35: MAX temp[0].x, temp[3].x___, const[13].z___; 36: MUL temp[2].xyz, const[9].xyz_, temp[0].xxx_; 37: MAD temp[0].xyz, temp[2].xyz_, const[0].xyz_, const[10].xyz_; 38: MUL temp[2].xyz, temp[5].xyz_, temp[0].xyz_; 39: TEX temp[12].xyz, input[5].xy__, 2D[4]; 40: MOV temp[0].xyz, temp[12].xyz_; 41: MAD temp[1].xyz, temp[0].xyz_, const[8].xyz_, temp[2].xyz_; 42: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[5].xy__, 2D[5]; 1: MUL temp[1].w, temp[0].___w, const[7].___x; 2: TEX temp[7].xyz, input[5].xy__, 2D[2]; 3: MOV temp[2].xyz, temp[7].xyz_; 4: TEX temp[8].xyz, input[5].xy__, 2D[3]; 5: MOV temp[3].xyz, temp[8].xyz_; 6: MUL temp[4].xyz, temp[3].xyz_, const[12].xyz_; 7: MAD temp[3].xyz, temp[2].xyz_, const[11].xyz_, temp[4].xyz_; 8: ADD temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 9: TEX temp[9].xyz, input[5].xy__, 2D[6]; 10: MOV temp[0].xyz, temp[9].xyz_; 11: ADD temp[3].xyz, temp[0].xyz_, const[13].xxx_; 12: DP3 temp[0].x, temp[3].xyz_, temp[3].xyz_; 13: RSQ temp[4].x, |temp[0].x___|; 14: MUL temp[0].xyz, temp[3].xyz_, temp[4].xxx_; 15: MOV temp[3].xyz, -input[3].xyz_; 16: DP3 temp[4].x, temp[0].xyz_, temp[3].xyz_; 17: MUL temp[5].xyz, temp[4].xxx_, temp[0].xyz_; 18: MUL temp[4].xyz, const[13].yyy_, temp[5].xyz_; 19: ADD temp[5].xyz, temp[3].xyz_, -temp[4].xyz_; 20: MUL temp[3].xyz, temp[5].xxx_, input[2].xyz_; 21: MAD temp[4].xyz, temp[5].yyy_, input[1].xyz_, temp[3].xyz_; 22: MAD temp[3].xyz, temp[5].zzz_, input[0].xyz_, temp[4].xyz_; 23: TEX temp[10].xyz, input[5].xy__, 2D[1]; 24: MOV temp[4].xyz, temp[10].xyz_; 25: MUL temp[5].xyz, const[3].xyz_, temp[3].xxx_; 26: MAD temp[6].xyz, const[4].xyz_, temp[3].yyy_, temp[5].xyz_; 27: MAD temp[5].xyz, const[5].xyz_, temp[3].zzz_, temp[6].xyz_; 28: TEX temp[11].xyz, temp[5].xyz_, CUBE[0]; 29: MOV temp[3].xyz, temp[11].xyz_; 30: MAD temp[5].xyz, temp[4].xyz_, temp[3].xyz_, temp[2].xyz_; 31: DP3 temp[2].x, input[4].xyz_, input[4].xyz_; 32: RSQ temp[3].x, |temp[2].x___|; 33: MUL temp[2].xyz, input[4].xyz_, temp[3].xxx_; 34: DP3 temp[3].x, temp[0].xyz_, temp[2].xyz_; 35: MAX temp[0].x, temp[3].x___, const[13].z___; 36: MUL temp[2].xyz, const[9].xyz_, temp[0].xxx_; 37: MAD temp[0].xyz, temp[2].xyz_, const[0].xyz_, const[10].xyz_; 38: MUL temp[2].xyz, temp[5].xyz_, temp[0].xyz_; 39: TEX temp[12].xyz, input[5].xy__, 2D[4]; 40: MOV temp[0].xyz, temp[12].xyz_; 41: MAD temp[1].xyz, temp[0].xyz_, const[8].xyz_, temp[2].xyz_; 42: MOV output[0], temp[1]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[5].xy__, 2D[5]; 1: MUL temp[1].w, temp[0].___w, const[7].___x; 2: TEX temp[7].xyz, input[5].xy__, 2D[2]; 3: TEX temp[8].xyz, input[5].xy__, 2D[3]; 4: MUL temp[4].xyz, temp[8].xyz_, const[12].xyz_; 5: MAD temp[3].xyz, temp[7].xyz_, const[11].xyz_, temp[4].xyz_; 6: ADD temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 7: TEX temp[9].xyz, input[5].xy__, 2D[6]; 8: ADD temp[3].xyz, temp[9].xyz_, none.-H-H-H_; 9: DP3 temp[0].x, temp[3].xyz_, temp[3].xyz_; 10: RSQ temp[4].x, |temp[0].x___|; 11: MUL temp[0].xyz, temp[3].xyz_, temp[4].xxx_; 12: DP3 temp[4].x, temp[0].xyz_, input[3].-x-y-z_; 13: MUL temp[5].xyz, temp[4].xxx_, temp[0].xyz_; 14: MUL temp[4].xyz, const[13].yyy_, temp[5].xyz_; 15: ADD temp[5].xyz, input[3].-x-y-z_, -temp[4].xyz_; 16: MUL temp[3].xyz, temp[5].xxx_, input[2].xyz_; 17: MAD temp[4].xyz, temp[5].yyy_, input[1].xyz_, temp[3].xyz_; 18: MAD temp[3].xyz, temp[5].zzz_, input[0].xyz_, temp[4].xyz_; 19: TEX temp[10].xyz, input[5].xy__, 2D[1]; 20: MUL temp[5].xyz, const[3].xyz_, temp[3].xxx_; 21: MAD temp[6].xyz, const[4].xyz_, temp[3].yyy_, temp[5].xyz_; 22: MAD temp[5].xyz, const[5].xyz_, temp[3].zzz_, temp[6].xyz_; 23: TEX temp[11].xyz, temp[5].xyz_, CUBE[0]; 24: MAD temp[5].xyz, temp[10].xyz_, temp[11].xyz_, temp[2].xyz_; 25: DP3 temp[2].x, input[4].xyz_, input[4].xyz_; 26: RSQ temp[3].x, |temp[2].x___|; 27: MUL temp[2].xyz, input[4].xyz_, temp[3].xxx_; 28: DP3 temp[3].x, temp[0].xyz_, temp[2].xyz_; 29: MAX temp[0].x, temp[3].x___, none.0___; 30: MUL temp[2].xyz, const[9].xyz_, temp[0].xxx_; 31: MAD temp[0].xyz, temp[2].xyz_, const[0].xyz_, const[10].xyz_; 32: MUL temp[2].xyz, temp[5].xyz_, temp[0].xyz_; 33: TEX temp[12].xyz, input[5].xy__, 2D[4]; 34: MAD temp[1].xyz, temp[12].xyz_, const[8].xyz_, temp[2].xyz_; 35: MOV output[0], temp[1]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[5].xy__, 2D[5]; 1: MUL temp[1].w, temp[0].___w, const[7].___x; 2: TEX temp[7].xyz, input[5].xy__, 2D[2]; 3: TEX temp[8].xyz, input[5].xy__, 2D[3]; 4: MUL temp[4].xyz, temp[8].xyz_, const[12].xyz_; 5: MAD temp[3].xyz, temp[7].xyz_, const[11].xyz_, temp[4].xyz_; 6: ADD temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 7: TEX temp[9].xyz, input[5].xy__, 2D[6]; 8: ADD temp[3].xyz, temp[9].xyz_, none.-H-H-H_; 9: DP3 temp[0].x, temp[3].xyz_, temp[3].xyz_; 10: RSQ temp[4].x, |temp[0].x___|; 11: MUL temp[0].xyz, temp[3].xyz_, temp[4].xxx_; 12: DP3 temp[4].x, temp[0].xyz_, input[3].-x-y-z_; 13: MUL temp[5].xyz, temp[4].xxx_, temp[0].xyz_; 14: MUL temp[4].xyz, const[13].yyy_, temp[5].xyz_; 15: ADD temp[5].xyz, input[3].-x-y-z_, -temp[4].xyz_; 16: MUL temp[3].xyz, temp[5].xxx_, input[2].xyz_; 17: MAD temp[4].xyz, temp[5].yyy_, input[1].xyz_, temp[3].xyz_; 18: MAD temp[3].xyz, temp[5].zzz_, input[0].xyz_, temp[4].xyz_; 19: TEX temp[10].xyz, input[5].xy__, 2D[1]; 20: MUL temp[5].xyz, const[3].xyz_, temp[3].xxx_; 21: MAD temp[6].xyz, const[4].xyz_, temp[3].yyy_, temp[5].xyz_; 22: MAD temp[5].xyz, const[5].xyz_, temp[3].zzz_, temp[6].xyz_; 23: TEX temp[11].xyz, temp[5].xyz_, CUBE[0]; 24: MAD temp[5].xyz, temp[10].xyz_, temp[11].xyz_, temp[2].xyz_; 25: DP3 temp[2].x, input[4].xyz_, input[4].xyz_; 26: RSQ temp[3].x, |temp[2].x___|; 27: MUL temp[2].xyz, input[4].xyz_, temp[3].xxx_; 28: DP3 temp[3].x, temp[0].xyz_, temp[2].xyz_; 29: MAX temp[0].x, temp[3].x___, none.0___; 30: MUL temp[2].xyz, const[9].xyz_, temp[0].xxx_; 31: MAD temp[0].xyz, temp[2].xyz_, const[0].xyz_, const[10].xyz_; 32: MUL temp[2].xyz, temp[5].xyz_, temp[0].xyz_; 33: TEX temp[12].xyz, input[5].xy__, 2D[4]; 34: MAD temp[1].xyz, temp[12].xyz_, const[8].xyz_, temp[2].xyz_; 35: MOV output[0], temp[1]; CONST[10] = { -0.5000 2.0000 0.0000 0.0000 } Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[5].xy__, 2D[5]; 1: MUL temp[1].w, temp[0].___w, const[4].___x; 2: TEX temp[7].xyz, input[5].xy__, 2D[2]; 3: TEX temp[8].xyz, input[5].xy__, 2D[3]; 4: MUL temp[4].xyz, temp[8].xyz_, const[9].xyz_; 5: MAD temp[3].xyz, temp[7].xyz_, const[8].xyz_, temp[4].xyz_; 6: ADD temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 7: TEX temp[9].xyz, input[5].xy__, 2D[6]; 8: ADD temp[3].xyz, temp[9].xyz_, none.-H-H-H_; 9: DP3 temp[0].x, temp[3].xyz_, temp[3].xyz_; 10: RSQ temp[4].x, |temp[0].x___|; 11: MUL temp[0].xyz, temp[3].xyz_, temp[4].xxx_; 12: DP3 temp[4].x, temp[0].xyz_, input[3].-x-y-z_; 13: MUL temp[5].xyz, temp[4].xxx_, temp[0].xyz_; 14: MUL temp[4].xyz, const[10].yyy_, temp[5].xyz_; 15: ADD temp[5].xyz, input[3].-x-y-z_, -temp[4].xyz_; 16: MUL temp[3].xyz, temp[5].xxx_, input[2].xyz_; 17: MAD temp[4].xyz, temp[5].yyy_, input[1].xyz_, temp[3].xyz_; 18: MAD temp[3].xyz, temp[5].zzz_, input[0].xyz_, temp[4].xyz_; 19: TEX temp[10].xyz, input[5].xy__, 2D[1]; 20: MUL temp[5].xyz, const[1].xyz_, temp[3].xxx_; 21: MAD temp[6].xyz, const[2].xyz_, temp[3].yyy_, temp[5].xyz_; 22: MAD temp[5].xyz, const[3].xyz_, temp[3].zzz_, temp[6].xyz_; 23: TEX temp[11].xyz, temp[5].xyz_, CUBE[0]; 24: MAD temp[5].xyz, temp[10].xyz_, temp[11].xyz_, temp[2].xyz_; 25: DP3 temp[2].x, input[4].xyz_, input[4].xyz_; 26: RSQ temp[3].x, |temp[2].x___|; 27: MUL temp[2].xyz, input[4].xyz_, temp[3].xxx_; 28: DP3 temp[3].x, temp[0].xyz_, temp[2].xyz_; 29: MAX temp[0].x, temp[3].x___, none.0___; 30: MUL temp[2].xyz, const[6].xyz_, temp[0].xxx_; 31: MAD temp[0].xyz, temp[2].xyz_, const[0].xyz_, const[7].xyz_; 32: MUL temp[2].xyz, temp[5].xyz_, temp[0].xyz_; 33: TEX temp[12].xyz, input[5].xy__, 2D[4]; 34: MAD temp[1].xyz, temp[12].xyz_, const[5].xyz_, temp[2].xyz_; 35: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[5].xy__, 2D[5]; 1: MUL temp[1].w, temp[0].___w, const[4].___x; 2: TEX temp[2].xyz, input[5].xy__, 2D[2]; 3: TEX temp[3].xyz, input[5].xy__, 2D[3]; 4: MUL temp[4].xyz, temp[3].xyz_, const[9].xyz_; 5: MAD temp[5].xyz, temp[2].xyz_, const[8].xyz_, temp[4].xyz_; 6: ADD temp[6].xyz, temp[0].xyz_, temp[5].xyz_; 7: TEX temp[7].xyz, input[5].xy__, 2D[6]; 8: ADD temp[8].xyz, temp[7].xyz_, none.-H-H-H_; 9: DP3 temp[0].x, temp[8].xyz_, temp[8].xyz_; 10: RSQ temp[4].x, |temp[0].x___|; 11: MUL temp[0].xyz, temp[8].xyz_, temp[4].xxx_; 12: DP3 temp[4].x, temp[0].xyz_, input[3].-x-y-z_; 13: MUL temp[9].xyz, temp[4].xxx_, temp[0].xyz_; 14: MUL temp[10].xyz, const[10].yyy_, temp[9].xyz_; 15: ADD temp[11].xyz, input[3].-x-y-z_, -temp[10].xyz_; 16: MUL temp[12].xyz, temp[11].xxx_, input[2].xyz_; 17: MAD temp[13].xyz, temp[11].yyy_, input[1].xyz_, temp[12].xyz_; 18: MAD temp[14].xyz, temp[11].zzz_, input[0].xyz_, temp[13].xyz_; 19: TEX temp[15].xyz, input[5].xy__, 2D[1]; 20: MUL temp[16].xyz, const[1].xyz_, temp[14].xxx_; 21: MAD temp[17].xyz, const[2].xyz_, temp[14].yyy_, temp[16].xyz_; 22: MAD temp[18].xyz, const[3].xyz_, temp[14].zzz_, temp[17].xyz_; 23: TEX temp[19].xyz, temp[18].xyz_, CUBE[0]; 24: MAD temp[20].xyz, temp[15].xyz_, temp[19].xyz_, temp[6].xyz_; 25: DP3 temp[6].x, input[4].xyz_, input[4].xyz_; 26: RSQ temp[14].x, |temp[6].x___|; 27: MUL temp[21].xyz, input[4].xyz_, temp[14].xxx_; 28: DP3 temp[14].x, temp[0].xyz_, temp[21].xyz_; 29: MAX temp[0].x, temp[14].x___, none.0___; 30: MUL temp[22].xyz, const[6].xyz_, temp[0].xxx_; 31: MAD temp[0].xyz, temp[22].xyz_, const[0].xyz_, const[7].xyz_; 32: MUL temp[23].xyz, temp[20].xyz_, temp[0].xyz_; 33: TEX temp[24].xyz, input[5].xy__, 2D[4]; 34: MAD temp[1].xyz, temp[24].xyz_, const[5].xyz_, temp[23].xyz_; 35: MOV output[0], temp[1]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[5].xy__, 2D[5]; 1: src0.xyz = const[4], src0.w = temp[0] MAD temp[1].w, src0.w, src0.x, src0.0 2: TEX temp[2].xyz, input[5].xy__, 2D[2]; 3: TEX temp[3].xyz, input[5].xy__, 2D[3]; 4: src0.xyz = temp[3], src1.xyz = const[9] MAD temp[4].xyz, src0.xyz, src1.xyz, src0.000 5: src0.xyz = temp[2], src1.xyz = const[8], src2.xyz = temp[4] MAD temp[5].xyz, src0.xyz, src1.xyz, src2.xyz 6: src0.xyz = temp[0], src1.xyz = temp[5] MAD temp[6].xyz, src0.xyz, src0.111, src1.xyz 7: TEX temp[7].xyz, input[5].xy__, 2D[6]; 8: src0.xyz = temp[7] MAD temp[8].xyz, src0.xyz, src0.111, -src0.HHH 9: src0.xyz = temp[8] DP3 temp[0].x, src0.xyz, src0.xyz 10: src0.xyz = temp[0] REPL_ALPHA temp[4].x RSQ, |src0.x| 11: src0.xyz = temp[8], src1.xyz = temp[4] MAD temp[0].xyz, src0.xyz, src1.xxx, src0.000 12: src0.xyz = temp[0], src1.xyz = input[3] DP3 temp[4].x, src0.xyz, -src1.xyz 13: src0.xyz = temp[4], src1.xyz = temp[0] MAD temp[9].xyz, src0.xxx, src1.xyz, src0.000 14: src0.xyz = const[10], src1.xyz = temp[9] MAD temp[10].xyz, src0.yyy, src1.xyz, src0.000 15: src0.xyz = input[3], src1.xyz = temp[10] MAD temp[11].xyz, -src0.xyz, src0.111, -src1.xyz 16: src0.xyz = temp[11], src1.xyz = input[2] MAD temp[12].xyz, src0.xxx, src1.xyz, src0.000 17: src0.xyz = temp[11], src1.xyz = input[1], src2.xyz = temp[12] MAD temp[13].xyz, src0.yyy, src1.xyz, src2.xyz 18: src0.xyz = temp[11], src1.xyz = input[0], src2.xyz = temp[13] MAD temp[14].xyz, src0.zzz, src1.xyz, src2.xyz 19: TEX temp[15].xyz, input[5].xy__, 2D[1]; 20: src0.xyz = const[1], src1.xyz = temp[14] MAD temp[16].xyz, src0.xyz, src1.xxx, src0.000 21: src0.xyz = const[2], src1.xyz = temp[14], src2.xyz = temp[16] MAD temp[17].xyz, src0.xyz, src1.yyy, src2.xyz 22: src0.xyz = const[3], src1.xyz = temp[14], src2.xyz = temp[17] MAD temp[18].xyz, src0.xyz, src1.zzz, src2.xyz 23: TEX temp[19].xyz, temp[18].xyz_, CUBE[0]; 24: src0.xyz = temp[15], src1.xyz = temp[19], src2.xyz = temp[6] MAD temp[20].xyz, src0.xyz, src1.xyz, src2.xyz 25: src0.xyz = input[4] DP3 temp[6].x, src0.xyz, src0.xyz 26: src0.xyz = temp[6] REPL_ALPHA temp[14].x RSQ, |src0.x| 27: src0.xyz = input[4], src1.xyz = temp[14] MAD temp[21].xyz, src0.xyz, src1.xxx, src0.000 28: src0.xyz = temp[0], src1.xyz = temp[21] DP3 temp[14].x, src0.xyz, src1.xyz 29: src0.xyz = temp[14] MAX temp[0].x, src0.x__, src0.0__ 30: src0.xyz = const[6], src1.xyz = temp[0] MAD temp[22].xyz, src0.xyz, src1.xxx, src0.000 31: src0.xyz = temp[22], src1.xyz = const[0], src2.xyz = const[7] MAD temp[0].xyz, src0.xyz, src1.xyz, src2.xyz 32: src0.xyz = temp[20], src1.xyz = temp[0] MAD temp[23].xyz, src0.xyz, src1.xyz, src0.000 33: TEX temp[24].xyz, input[5].xy__, 2D[4]; 34: src0.xyz = temp[24], src1.xyz = const[5], src2.xyz = temp[23] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 35: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[5].xy__, 2D[5]; 2: TEX temp[2].xyz, input[5].xy__, 2D[2]; 3: TEX temp[3].xyz, input[5].xy__, 2D[3]; 4: TEX temp[7].xyz, input[5].xy__, 2D[6]; 5: TEX temp[15].xyz, input[5].xy__, 2D[1]; 6: TEX temp[24].xyz, input[5].xy__, 2D[4]; 7: src0.xyz = temp[7], src0.w = temp[0], src1.xyz = const[4] MAD temp[8].xyz, src0.xyz, src0.111, -src0.HHH MAD temp[1].w, src0.w, src1.x, src0.0 8: src0.xyz = temp[3], src1.xyz = const[9] MAD temp[4].xyz, src0.xyz, src1.xyz, src0.000 9: src0.xyz = temp[2], src1.xyz = const[8], src2.xyz = temp[4] MAD temp[5].xyz, src0.xyz, src1.xyz, src2.xyz 10: src0.xyz = temp[0], src1.xyz = temp[5] MAD temp[6].xyz, src0.xyz, src0.111, src1.xyz 11: src0.xyz = temp[8] DP3 temp[0].x, src0.xyz, src0.xyz 12: src0.xyz = temp[0] REPL_ALPHA temp[4].x RSQ, |src0.x| 13: src0.xyz = temp[8], src1.xyz = temp[4] MAD temp[0].xyz, src0.xyz, src1.xxx, src0.000 14: src0.xyz = temp[0], src1.xyz = input[3] DP3 temp[4].x, src0.xyz, -src1.xyz 15: src0.xyz = temp[4], src1.xyz = temp[0] MAD temp[9].xyz, src0.xxx, src1.xyz, src0.000 16: src0.xyz = const[10], src1.xyz = temp[9] MAD temp[10].xyz, src0.yyy, src1.xyz, src0.000 17: src0.xyz = input[3], src1.xyz = temp[10] MAD temp[11].xyz, -src0.xyz, src0.111, -src1.xyz 18: src0.xyz = temp[11], src1.xyz = input[2] MAD temp[12].xyz, src0.xxx, src1.xyz, src0.000 19: src0.xyz = temp[11], src1.xyz = input[1], src2.xyz = temp[12] MAD temp[13].xyz, src0.yyy, src1.xyz, src2.xyz 20: src0.xyz = temp[11], src1.xyz = input[0], src2.xyz = temp[13] MAD temp[14].xyz, src0.zzz, src1.xyz, src2.xyz 21: src0.xyz = const[1], src1.xyz = temp[14] MAD temp[16].xyz, src0.xyz, src1.xxx, src0.000 22: src0.xyz = const[2], src1.xyz = temp[14], src2.xyz = temp[16] MAD temp[17].xyz, src0.xyz, src1.yyy, src2.xyz 23: src0.xyz = const[3], src1.xyz = temp[14], src2.xyz = temp[17] MAD temp[18].xyz, src0.xyz, src1.zzz, src2.xyz 24: BEGIN_TEX; 25: TEX temp[19].xyz, temp[18].xyz_, CUBE[0]; 26: src0.xyz = temp[15], src1.xyz = temp[19], src2.xyz = temp[6] MAD temp[20].xyz, src0.xyz, src1.xyz, src2.xyz 27: src0.xyz = input[4] DP3 temp[6].x, src0.xyz, src0.xyz 28: src0.xyz = temp[6] REPL_ALPHA temp[14].x RSQ, |src0.x| 29: src0.xyz = input[4], src1.xyz = temp[14] MAD temp[21].xyz, src0.xyz, src1.xxx, src0.000 30: src0.xyz = temp[0], src1.xyz = temp[21] DP3 temp[14].x, src0.xyz, src1.xyz 31: src0.xyz = temp[14] MAX temp[0].x, src0.x__, src0.0__ 32: src0.xyz = const[6], src1.xyz = temp[0] MAD temp[22].xyz, src0.xyz, src1.xxx, src0.000 33: src0.xyz = temp[22], src1.xyz = const[0], src2.xyz = const[7] MAD temp[0].xyz, src0.xyz, src1.xyz, src2.xyz 34: src0.xyz = temp[20], src1.xyz = temp[0] MAD temp[23].xyz, src0.xyz, src1.xyz, src0.000 35: src0.xyz = temp[24], src1.xyz = const[5], src2.xyz = temp[23] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 36: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[6], temp[5].xy__, 2D[5]; 2: TEX temp[7].xyz, temp[5].xy__, 2D[2]; 3: TEX temp[8].xyz, temp[5].xy__, 2D[3]; 4: TEX temp[9].xyz, temp[5].xy__, 2D[6]; 5: TEX temp[10].xyz, temp[5].xy__, 2D[1]; 6: TEX temp[11].xyz, temp[5].xy__, 2D[4]; 7: src0.xyz = temp[9], src0.w = temp[6], src1.xyz = const[4] MAD temp[9].xyz, src0.xyz, src0.111, -src0.HHH MAD temp[5].w, src0.w, src1.x, src0.0 8: src0.xyz = temp[8], src1.xyz = const[9] MAD temp[8].xyz, src0.xyz, src1.xyz, src0.000 9: src0.xyz = temp[7], src1.xyz = const[8], src2.xyz = temp[8] MAD temp[7].xyz, src0.xyz, src1.xyz, src2.xyz 10: src0.xyz = temp[6], src1.xyz = temp[7] MAD temp[7].xyz, src0.xyz, src0.111, src1.xyz 11: src0.xyz = temp[9] DP3 temp[6].x, src0.xyz, src0.xyz 12: src0.xyz = temp[6] REPL_ALPHA temp[8].x RSQ, |src0.x| 13: src0.xyz = temp[9], src1.xyz = temp[8] MAD temp[6].xyz, src0.xyz, src1.xxx, src0.000 14: src0.xyz = temp[6], src1.xyz = temp[3] DP3 temp[8].x, src0.xyz, -src1.xyz 15: src0.xyz = temp[8], src1.xyz = temp[6] MAD temp[8].xyz, src0.xxx, src1.xyz, src0.000 16: src0.xyz = const[10], src1.xyz = temp[8] MAD temp[8].xyz, src0.yyy, src1.xyz, src0.000 17: src0.xyz = temp[3], src1.xyz = temp[8] MAD temp[3].xyz, -src0.xyz, src0.111, -src1.xyz 18: src0.xyz = temp[3], src1.xyz = temp[2] MAD temp[2].xyz, src0.xxx, src1.xyz, src0.000 19: src0.xyz = temp[3], src1.xyz = temp[1], src2.xyz = temp[2] MAD temp[1].xyz, src0.yyy, src1.xyz, src2.xyz 20: src0.xyz = temp[3], src1.xyz = temp[0], src2.xyz = temp[1] MAD temp[0].xyz, src0.zzz, src1.xyz, src2.xyz 21: src0.xyz = const[1], src1.xyz = temp[0] MAD temp[1].xyz, src0.xyz, src1.xxx, src0.000 22: src0.xyz = const[2], src1.xyz = temp[0], src2.xyz = temp[1] MAD temp[1].xyz, src0.xyz, src1.yyy, src2.xyz 23: src0.xyz = const[3], src1.xyz = temp[0], src2.xyz = temp[1] MAD temp[1].xyz, src0.xyz, src1.zzz, src2.xyz 24: BEGIN_TEX; 25: TEX temp[1].xyz, temp[1].xyz_, CUBE[0]; 26: src0.xyz = temp[10], src1.xyz = temp[1], src2.xyz = temp[7] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 27: src0.xyz = temp[4] DP3 temp[7].x, src0.xyz, src0.xyz 28: src0.xyz = temp[7] REPL_ALPHA temp[0].x RSQ, |src0.x| 29: src0.xyz = temp[4], src1.xyz = temp[0] MAD temp[2].xyz, src0.xyz, src1.xxx, src0.000 30: src0.xyz = temp[6], src1.xyz = temp[2] DP3 temp[0].x, src0.xyz, src1.xyz 31: src0.xyz = temp[0] MAX temp[6].x, src0.x__, src0.0__ 32: src0.xyz = const[6], src1.xyz = temp[6] MAD temp[0].xyz, src0.xyz, src1.xxx, src0.000 33: src0.xyz = temp[0], src1.xyz = const[0], src2.xyz = const[7] MAD temp[6].xyz, src0.xyz, src1.xyz, src2.xyz 34: src0.xyz = temp[1], src1.xyz = temp[6] MAD temp[0].xyz, src0.xyz, src1.xyz, src0.000 35: src0.xyz = temp[11], src1.xyz = const[5], src2.xyz = temp[0] MAD temp[5].xyz, src0.xyz, src1.xyz, src2.xyz 36: src0.xyz = temp[5], src0.w = temp[5] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=17************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 16, tex_end: 5 (code_addr: 000a0400) TEX: TEX t6, t5, texture[5] (0000a985) TEX t7, t5, texture[2] (000091c5) TEX t8, t5, texture[3] (00009a05) TEX t9, t5, texture[6] (0000b245) TEX t10, t5, texture[1] (00008a85) TEX t11, t5, texture[4] (0000a2c5) 0: xyz: t9 c4 t0 bias-> t9.xyz (03a40909) w: t6 t0 t0 bias-> t5.w (00940006) xyz: t9.xyz 1.0 -0.5 op: 000d8a80 w: t6.w c4.x 0.0 op: 00040189 1: xyz: t8 c9 t0 bias-> t8.xyz (03a00a48) w: t0 t0 t0 bias-> (00000000) xyz: t8.xyz c9.xyz 0.0 op: 00050200 w: t8.x t8.x t8.x op: 00000000 2: xyz: t7 c8 t8 bias-> t7.xyz (039c8a07) w: t0 t0 t0 bias-> (00000000) xyz: t7.xyz c8.xyz t8.xyz op: 00020200 w: t7.x t7.x t7.x op: 00000000 3: xyz: t6 t7 t0 bias-> t7.xyz (039c01c6) w: t0 t0 t0 bias-> (00000000) xyz: t6.xyz 1.0 t7.xyz op: 00010a80 w: t6.x t6.x t6.x op: 00000000 4: xyz: t9 t0 t0 bias-> t6.x (00980009) w: t0 t0 t0 bias-> (00000000) xyz: t9.xyz t9.xyz t9.xxx op: 00804000 w: t9.x t9.x t9.x op: 00000000 5: xyz: t6 t0 t0 bias-> t8.x (00a00006) w: t0 t0 t0 bias-> (00000000) xyz: t6.xxx t6.xxx t6.xxx op: 05004081 w: |t6.x| t6.x t6.x op: 05800040 6: xyz: t9 t8 t0 bias-> t6.xyz (03980209) w: t0 t0 t0 bias-> (00000000) xyz: t9.xyz t8.xxx 0.0 op: 00050280 w: t9.x t9.x t9.x op: 00000000 7: xyz: t6 t3 t0 bias-> t8.x (00a000c6) w: t0 t0 t0 bias-> (00000000) xyz: t6.xyz -t3.xyz t6.xxx op: 00805200 w: t6.x t6.x t6.x op: 00000000 8: xyz: t8 t6 t0 bias-> t8.xyz (03a00188) w: t0 t0 t0 bias-> (00000000) xyz: t8.xxx t6.xyz 0.0 op: 00050201 w: t8.x t8.x t8.x op: 00000000 9: xyz: c10 t8 t0 bias-> t8.xyz (03a0022a) w: t0 t0 t0 bias-> (00000000) xyz: c10.yyy t8.xyz 0.0 op: 00050202 w: c10.x c10.x c10.x op: 00000000 10: xyz: t3 t8 t0 bias-> t3.xyz (038c0203) w: t0 t0 t0 bias-> (00000000) xyz: -t3.xyz 1.0 -t8.xyz op: 00090aa0 w: t3.x t3.x t3.x op: 00000000 11: xyz: t3 t2 t0 bias-> t2.xyz (03880083) w: t0 t0 t0 bias-> (00000000) xyz: t3.xxx t2.xyz 0.0 op: 00050201 w: t3.x t3.x t3.x op: 00000000 12: xyz: t3 t1 t2 bias-> t1.xyz (03842043) w: t0 t0 t0 bias-> (00000000) xyz: t3.yyy t1.xyz t2.xyz op: 00020202 w: t3.x t3.x t3.x op: 00000000 13: xyz: t3 t0 t1 bias-> t0.xyz (03801003) w: t0 t0 t0 bias-> (00000000) xyz: t3.zzz t0.xyz t1.xyz op: 00020203 w: t3.x t3.x t3.x op: 00000000 14: xyz: c1 t0 t0 bias-> t1.xyz (03840021) w: t0 t0 t0 bias-> (00000000) xyz: c1.xyz t0.xxx 0.0 op: 00050280 w: c1.x c1.x c1.x op: 00000000 15: xyz: c2 t0 t1 bias-> t1.xyz (03841022) w: t0 t0 t0 bias-> (00000000) xyz: c2.xyz t0.yyy t1.xyz op: 00020300 w: c2.x c2.x c2.x op: 00000000 16: xyz: c3 t0 t1 bias-> t1.xyz (03841023) w: t0 t0 t0 bias-> (00000000) xyz: c3.xyz t0.zzz t1.xyz op: 00020380 w: c3.x c3.x c3.x op: 00000000 NODE 1: alu_offset: 17, tex_offset: 6, alu_end: 10, tex_end: 0 (code_addr: 00406291) TEX: TEX t1, t1, texture[0] (00008041) 17: xyz: t10 t1 t7 bias-> t1.xyz (0384704a) w: t0 t0 t0 bias-> (00000000) xyz: t10.xyz t1.xyz t7.xyz op: 00020200 w: t10.x t10.x t10.x op: 00000000 18: xyz: t4 t0 t0 bias-> t7.x (009c0004) w: t0 t0 t0 bias-> (00000000) xyz: t4.xyz t4.xyz t4.xxx op: 00804000 w: t4.x t4.x t4.x op: 00000000 19: xyz: t7 t0 t0 bias-> t0.x (00800007) w: t0 t0 t0 bias-> (00000000) xyz: t7.xxx t7.xxx t7.xxx op: 05004081 w: |t7.x| t7.x t7.x op: 05800040 20: xyz: t4 t0 t0 bias-> t2.xyz (03880004) w: t0 t0 t0 bias-> (00000000) xyz: t4.xyz t0.xxx 0.0 op: 00050280 w: t4.x t4.x t4.x op: 00000000 21: xyz: t6 t2 t0 bias-> t0.x (00800086) w: t0 t0 t0 bias-> (00000000) xyz: t6.xyz t2.xyz t6.xxx op: 00804200 w: t6.x t6.x t6.x op: 00000000 22: xyz: t0 t0 t0 bias-> t6.x (00980000) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz 0.0 t0.xxx op: 02804a00 w: t0.x t0.x t0.x op: 00000000 23: xyz: c6 t6 t0 bias-> t0.xyz (038001a6) w: t0 t0 t0 bias-> (00000000) xyz: c6.xyz t6.xxx 0.0 op: 00050280 w: c6.x c6.x c6.x op: 00000000 24: xyz: t0 c0 c7 bias-> t6.xyz (039a7800) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz c0.xyz c7.xyz op: 00020200 w: t0.x t0.x t0.x op: 00000000 25: xyz: t1 t6 t0 bias-> t0.xyz (03800181) w: t0 t0 t0 bias-> (00000000) xyz: t1.xyz t6.xyz 0.0 op: 00050200 w: t1.x t1.x t1.x op: 00000000 26: xyz: t11 c5 t0 bias-> t5.xyz (0394094b) w: t0 t0 t0 bias-> (00000000) xyz: t11.xyz c5.xyz t0.xyz op: 00020200 w: t11.x t11.x t11.x op: 00000000 27: xyz: t5 t0 t0 bias-> o0.xyz (1c000005) w: t5 t0 t0 bias-> o0.w (01000005) xyz: t5.xyz 1.0 0.0 op: 00050a80 w: t5.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[10] DCL OUT[2], GENERIC[11] DCL OUT[3], GENERIC[12] DCL OUT[4], GENERIC[13] DCL OUT[5], GENERIC[14] DCL OUT[6], GENERIC[15] DCL CONST[0..9] DCL TEMP[0..1] 0: MUL TEMP[0], CONST[0], IN[1].xxxx 1: MAD TEMP[1], CONST[1], IN[1].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[1].zzzz, TEMP[1] 3: MAD OUT[6].xy, CONST[3].xyxx, IN[1].wwxx, TEMP[0].xyxx 4: DP3 OUT[5].x, CONST[4].xyzz, IN[2].xyzz 5: DP3 OUT[5].y, CONST[4].xyzz, IN[3].xyzz 6: DP3 OUT[5].z, CONST[4].xyzz, IN[4].xyzz 7: ADD TEMP[0].xyz, CONST[5].xyzz, -IN[0].xyzz 8: DP3 OUT[4].x, TEMP[0].xyzz, IN[2].xyzz 9: DP3 OUT[4].y, TEMP[0].xyzz, IN[3].xyzz 10: DP3 OUT[4].z, TEMP[0].xyzz, IN[4].xyzz 11: MOV OUT[3].xyz, IN[2].xyzx 12: MOV OUT[2].xyz, IN[3].xyzx 13: MOV OUT[1].xyz, IN[4].xyzx 14: MUL TEMP[0], CONST[6], IN[0].xxxx 15: MAD TEMP[1], CONST[7], IN[0].yyyy, TEMP[0] 16: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[1] 17: MAD OUT[0], CONST[9], IN[0].wwww, TEMP[0] 18: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[6].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP3 output[5].x, const[4].xyzz, input[2].xyzz; 5: DP3 output[5].y, const[4].xyzz, input[3].xyzz; 6: DP3 output[5].z, const[4].xyzz, input[4].xyzz; 7: ADD temp[0].xyz, const[5].xyzz, -input[0].xyzz; 8: DP3 output[4].x, temp[0].xyzz, input[2].xyzz; 9: DP3 output[4].y, temp[0].xyzz, input[3].xyzz; 10: DP3 output[4].z, temp[0].xyzz, input[4].xyzz; 11: MOV output[3].xyz, input[2].xyzx; 12: MOV output[2].xyz, input[3].xyzx; 13: MOV output[1].xyz, input[4].xyzx; 14: MUL temp[0], const[6], input[0].xxxx; 15: MAD temp[1], const[7], input[0].yyyy, temp[0]; 16: MAD temp[0], const[8], input[0].zzzz, temp[1]; 17: MAD temp[2], const[9], input[0].wwww, temp[0]; 18: MOV output[0], temp[2]; 19: MOV output[7], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[6].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP3 output[5].x, const[4].xyzz, input[2].xyzz; 5: DP3 output[5].y, const[4].xyzz, input[3].xyzz; 6: DP3 output[5].z, const[4].xyzz, input[4].xyzz; 7: ADD temp[0].xyz, const[5].xyzz, -input[0].xyzz; 8: DP3 output[4].x, temp[0].xyzz, input[2].xyzz; 9: DP3 output[4].y, temp[0].xyzz, input[3].xyzz; 10: DP3 output[4].z, temp[0].xyzz, input[4].xyzz; 11: MOV output[3].xyz, input[2].xyzx; 12: MOV output[2].xyz, input[3].xyzx; 13: MOV output[1].xyz, input[4].xyzx; 14: MUL temp[0], const[6], input[0].xxxx; 15: MAD temp[1], const[7], input[0].yyyy, temp[0]; 16: MAD temp[0], const[8], input[0].zzzz, temp[1]; 17: MAD temp[2], const[9], input[0].wwww, temp[0]; 18: MOV output[0], temp[2]; 19: MOV output[7], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[6].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP3 output[5].x, const[4].xyzz, input[2].xyzz; 5: DP3 output[5].y, const[4].xyzz, input[3].xyzz; 6: DP3 output[5].z, const[4].xyzz, input[4].xyzz; 7: ADD temp[0].xyz, const[5].xyzz, -input[0].xyzz; 8: DP3 output[4].x, temp[0].xyzz, input[2].xyzz; 9: DP3 output[4].y, temp[0].xyzz, input[3].xyzz; 10: DP3 output[4].z, temp[0].xyzz, input[4].xyzz; 11: MOV output[3].xyz, input[2].xyzx; 12: MOV output[2].xyz, input[3].xyzx; 13: MOV output[1].xyz, input[4].xyzx; 14: MUL temp[0], const[6], input[0].xxxx; 15: MAD temp[1], const[7], input[0].yyyy, temp[0]; 16: MAD temp[0], const[8], input[0].zzzz, temp[1]; 17: MAD temp[2], const[9], input[0].wwww, temp[0]; 18: MOV output[0], temp[2]; 19: MOV output[7], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[6].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP3 output[5].x, const[4].xyzz, input[2].xyzz; 5: DP3 output[5].y, const[4].xyzz, input[3].xyzz; 6: DP3 output[5].z, const[4].xyzz, input[4].xyzz; 7: ADD temp[0].xyz, const[5].xyzz, -input[0].xyzz; 8: DP3 output[4].x, temp[0].xyzz, input[2].xyzz; 9: DP3 output[4].y, temp[0].xyzz, input[3].xyzz; 10: DP3 output[4].z, temp[0].xyzz, input[4].xyzz; 11: MOV output[3].xyz, input[2].xyzx; 12: MOV output[2].xyz, input[3].xyzx; 13: MOV output[1].xyz, input[4].xyzx; 14: MUL temp[0], const[6], input[0].xxxx; 15: MAD temp[1], const[7], input[0].yyyy, temp[0]; 16: MAD temp[0], const[8], input[0].zzzz, temp[1]; 17: MAD temp[2], const[9], input[0].wwww, temp[0]; 18: MOV output[0], temp[2]; 19: MOV output[7], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[6].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP4 output[5].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[5].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[5].z, const[4].xyz0, input[4].xyz0; 7: ADD temp[0].xyz, const[5].xyzz, -input[0].xyzz; 8: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 9: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 10: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 11: MOV output[3].xyz, input[2].xyzx; 12: MOV output[2].xyz, input[3].xyzx; 13: MOV output[1].xyz, input[4].xyzx; 14: MUL temp[0], const[6], input[0].xxxx; 15: MAD temp[1], const[7], input[0].yyyy, temp[0]; 16: MAD temp[0], const[8], input[0].zzzz, temp[1]; 17: MAD temp[2], const[9], input[0].wwww, temp[0]; 18: MOV output[0], temp[2]; 19: MOV output[7], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[6].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP4 output[5].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[5].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[5].z, const[4].xyz0, input[4].xyz0; 7: ADD temp[0].xyz, const[5].xyzz, -input[0].xyzz; 8: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 9: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 10: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 11: MOV output[3].xyz, input[2].xyzx; 12: MOV output[2].xyz, input[3].xyzx; 13: MOV output[1].xyz, input[4].xyzx; 14: MUL temp[0], const[6], input[0].xxxx; 15: MAD temp[1], const[7], input[0].yyyy, temp[0]; 16: MAD temp[0], const[8], input[0].zzzz, temp[1]; 17: MAD temp[2], const[9], input[0].wwww, temp[0]; 18: MOV output[0], temp[2]; 19: MOV output[7], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[6].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[5].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[5].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[5].z, const[4].xyz0, input[4].xyz0; 7: ADD temp[0].xyz, const[5].xyz_, -input[0].xyz_; 8: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 9: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 10: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 11: MOV output[3].xyz, input[2].xyz_; 12: MOV output[2].xyz, input[3].xyz_; 13: MOV output[1].xyz, input[4].xyz_; 14: MUL temp[0], const[6], input[0].xxxx; 15: MAD temp[1], const[7], input[0].yyyy, temp[0]; 16: MAD temp[0], const[8], input[0].zzzz, temp[1]; 17: MAD temp[2], const[9], input[0].wwww, temp[0]; 18: MOV output[0], temp[2]; 19: MOV output[7], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[6].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[5].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[5].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[5].z, const[4].xyz0, input[4].xyz0; 7: ADD temp[0].xyz, const[5].xyz_, -input[0].xyz_; 8: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 9: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 10: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 11: MOV output[3].xyz, input[2].xyz_; 12: MOV output[2].xyz, input[3].xyz_; 13: MOV output[1].xyz, input[4].xyz_; 14: MUL temp[0], const[6], input[0].xxxx; 15: MAD temp[1], const[7], input[0].yyyy, temp[0]; 16: MAD temp[0], const[8], input[0].zzzz, temp[1]; 17: MAD temp[2], const[9], input[0].wwww, temp[0]; 18: MOV output[0], temp[2]; 19: MOV output[7], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[6].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[5].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[5].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[5].z, const[4].xyz0, input[4].xyz0; 7: ADD temp[0].xyz, const[5].xyz_, -input[0].xyz_; 8: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 9: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 10: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 11: MOV output[3].xyz, input[2].xyz_; 12: MOV output[2].xyz, input[3].xyz_; 13: MOV output[1].xyz, input[4].xyz_; 14: MUL temp[0], const[6], input[0].xxxx; 15: MAD temp[1], const[7], input[0].yyyy, temp[0]; 16: MAD temp[0], const[8], input[0].zzzz, temp[1]; 17: MAD temp[2], const[9], input[0].wwww, temp[0]; 18: MOV output[0], temp[2]; 19: MOV output[7], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[6].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[5].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[5].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[5].z, const[4].xyz0, input[4].xyz0; 7: ADD temp[0].xyz, const[5].xyz_, -input[0].xyz_; 8: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 9: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 10: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 11: MOV output[3].xyz, input[2].xyz_; 12: MOV output[2].xyz, input[3].xyz_; 13: MOV output[1].xyz, input[4].xyz_; 14: MUL temp[0], const[6], input[0].xxxx; 15: MAD temp[1], const[7], input[0].yyyy, temp[0]; 16: MAD temp[0], const[8], input[0].zzzz, temp[1]; 17: MAD temp[2], const[9], input[0].wwww, temp[0]; 18: MOV output[0], temp[2]; 19: MOV output[7], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[6].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[5].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[5].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[5].z, const[4].xyz0, input[4].xyz0; 7: ADD temp[0].xyz, const[5].xyz_, -input[0].xyz_; 8: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 9: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 10: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 11: MOV output[3].xyz, input[2].xyz_; 12: MOV output[2].xyz, input[3].xyz_; 13: MOV output[1].xyz, input[4].xyz_; 14: MUL temp[0], const[6], input[0].xxxx; 15: MAD temp[1], const[7], input[0].yyyy, temp[0]; 16: MAD temp[0], const[8], input[0].zzzz, temp[1]; 17: MAD temp[0], const[9], input[0].wwww, temp[0]; 18: MOV output[0], temp[0]; 19: MOV output[7], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[6].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[5].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[5].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[5].z, const[4].xyz0, input[4].xyz0; 7: ADD temp[0].xyz, const[5].xyz_, -input[0].xyz_; 8: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 9: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 10: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 11: MOV output[3].xyz, input[2].xyz_; 12: MOV output[2].xyz, input[3].xyz_; 13: MOV output[1].xyz, input[4].xyz_; 14: MUL temp[0], const[6], input[0].xxxx; 15: MAD temp[1], const[7], input[0].yyyy, temp[0]; 16: MAD temp[0], const[8], input[0].zzzz, temp[1]; 17: MAD temp[0], const[9], input[0].wwww, temp[0]; 18: MOV output[0], temp[0]; 19: MOV output[7], temp[0]; Final vertex program code: 0: op: 0x00300002 dst: 0t op: VE_MULTIPLY src0: 0x01f90002 reg: 0c swiz: X/ Y/ U/ U src1: 0x01f80021 reg: 1i swiz: X/ X/ U/ U src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00302004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x01f90022 reg: 1c swiz: X/ Y/ U/ U src1: 0x01f92021 reg: 1i swiz: Y/ Y/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 2: op: 0x00300004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x01f90042 reg: 2c swiz: X/ Y/ U/ U src1: 0x01fa4021 reg: 1i swiz: Z/ Z/ U/ U src2: 0x01f90020 reg: 1t swiz: X/ Y/ U/ U 3: op: 0x0030c204 dst: 6o op: VE_MULTIPLY_ADD src0: 0x01f90062 reg: 3c swiz: X/ Y/ U/ U src1: 0x01fb6021 reg: 1i swiz: W/ W/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 4: op: 0x0010a201 dst: 5o op: VE_DOT_PRODUCT src0: 0x01110082 reg: 4c swiz: X/ Y/ Z/ 0 src1: 0x01110041 reg: 2i swiz: X/ Y/ Z/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 5: op: 0x0020a201 dst: 5o op: VE_DOT_PRODUCT src0: 0x01110082 reg: 4c swiz: X/ Y/ Z/ 0 src1: 0x01110061 reg: 3i swiz: X/ Y/ Z/ 0 src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 6: op: 0x0040a201 dst: 5o op: VE_DOT_PRODUCT src0: 0x01110082 reg: 4c swiz: X/ Y/ Z/ 0 src1: 0x01110081 reg: 4i swiz: X/ Y/ Z/ 0 src2: 0x01248081 reg: 4i swiz: 0/ 0/ 0/ 0 7: op: 0x00700003 dst: 0t op: VE_ADD src0: 0x01d100a2 reg: 5c swiz: X/ Y/ Z/ U src1: 0x1fd10001 reg: 0i swiz: -X/-Y/-Z/-U src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 8: op: 0x00108201 dst: 4o op: VE_DOT_PRODUCT src0: 0x01110000 reg: 0t swiz: X/ Y/ Z/ 0 src1: 0x01110041 reg: 2i swiz: X/ Y/ Z/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 9: op: 0x00208201 dst: 4o op: VE_DOT_PRODUCT src0: 0x01110000 reg: 0t swiz: X/ Y/ Z/ 0 src1: 0x01110061 reg: 3i swiz: X/ Y/ Z/ 0 src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 10: op: 0x00408201 dst: 4o op: VE_DOT_PRODUCT src0: 0x01110000 reg: 0t swiz: X/ Y/ Z/ 0 src1: 0x01110081 reg: 4i swiz: X/ Y/ Z/ 0 src2: 0x01248081 reg: 4i swiz: 0/ 0/ 0/ 0 11: op: 0x00706203 dst: 3o op: VE_ADD src0: 0x01d10041 reg: 2i swiz: X/ Y/ Z/ U src1: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 12: op: 0x00704203 dst: 2o op: VE_ADD src0: 0x01d10061 reg: 3i swiz: X/ Y/ Z/ U src1: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 13: op: 0x00702203 dst: 1o op: VE_ADD src0: 0x01d10081 reg: 4i swiz: X/ Y/ Z/ U src1: 0x01248081 reg: 4i swiz: 0/ 0/ 0/ 0 src2: 0x01248081 reg: 4i swiz: 0/ 0/ 0/ 0 14: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 15: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 16: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10102 reg: 8c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 17: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10122 reg: 9c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 18: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 19: op: 0x00f0e203 dst: 7o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], GENERIC[10], PERSPECTIVE DCL IN[1], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL CONST[0..4] DCL TEMP[0..4] IMM FLT32 { -0.5000, 0.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[1].xyyy, SAMP[1], 2D 1: MUL TEMP[1].w, TEMP[0].xxxw, CONST[1].xxxx 2: TEX TEMP[2].xyz, IN[1].xyyy, SAMP[2], 2D 3: ADD TEMP[3].xyz, TEMP[2].xyzz, IMM[0].xxxy 4: DP3 TEMP[2].x, TEMP[3].xyzz, TEMP[3].xyzz 5: RSQ TEMP[4].x, TEMP[2].xxxx 6: MUL TEMP[2].xyz, TEMP[3].xyzz, TEMP[4].xxxx 7: DP3 TEMP[3].x, IN[0].xyzz, IN[0].xyzz 8: RSQ TEMP[4].x, TEMP[3].xxxx 9: MUL TEMP[3].xyz, IN[0].xyzz, TEMP[4].xxxx 10: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[3].xyzz 11: MAX TEMP[2].x, TEMP[4].xxxx, IMM[0].yyyy 12: MUL TEMP[3].xyz, CONST[3].xyzz, TEMP[2].xxxx 13: MAD TEMP[2].xyz, TEMP[3].xyzz, CONST[0].xyzz, CONST[4].xyzz 14: MUL TEMP[3].xyz, TEMP[0].xyzz, TEMP[2].xyzz 15: TEX TEMP[0].xyz, IN[1].xyyy, SAMP[0], 2D 16: MAD TEMP[1].xyz, TEMP[0].xyzx, CONST[2].xyzx, TEMP[3].xyzx 17: MOV OUT[0], TEMP[1] 18: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[1].xxxx; 2: TEX temp[2].xyz, input[1].xyyy, 2D[2]; 3: ADD temp[3].xyz, temp[2].xyzz, const[5].xxxy; 4: DP3 temp[2].x, temp[3].xyzz, temp[3].xyzz; 5: RSQ temp[4].x, temp[2].xxxx; 6: MUL temp[2].xyz, temp[3].xyzz, temp[4].xxxx; 7: DP3 temp[3].x, input[0].xyzz, input[0].xyzz; 8: RSQ temp[4].x, temp[3].xxxx; 9: MUL temp[3].xyz, input[0].xyzz, temp[4].xxxx; 10: DP3 temp[4].x, temp[2].xyzz, temp[3].xyzz; 11: MAX temp[2].x, temp[4].xxxx, const[5].yyyy; 12: MUL temp[3].xyz, const[3].xyzz, temp[2].xxxx; 13: MAD temp[2].xyz, temp[3].xyzz, const[0].xyzz, const[4].xyzz; 14: MUL temp[3].xyz, temp[0].xyzz, temp[2].xyzz; 15: TEX temp[0].xyz, input[1].xyyy, 2D[0]; 16: MAD temp[1].xyz, temp[0].xyzx, const[2].xyzx, temp[3].xyzx; 17: MOV output[0], temp[1]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[1].xxxx; 2: TEX temp[2].xyz, input[1].xyyy, 2D[2]; 3: ADD temp[3].xyz, temp[2].xyzz, const[5].xxxy; 4: DP3 temp[2].x, temp[3].xyzz, temp[3].xyzz; 5: RSQ temp[4].x, temp[2].xxxx; 6: MUL temp[2].xyz, temp[3].xyzz, temp[4].xxxx; 7: DP3 temp[3].x, input[0].xyzz, input[0].xyzz; 8: RSQ temp[4].x, temp[3].xxxx; 9: MUL temp[3].xyz, input[0].xyzz, temp[4].xxxx; 10: DP3 temp[4].x, temp[2].xyzz, temp[3].xyzz; 11: MAX temp[2].x, temp[4].xxxx, const[5].yyyy; 12: MUL temp[3].xyz, const[3].xyzz, temp[2].xxxx; 13: MAD temp[2].xyz, temp[3].xyzz, const[0].xyzz, const[4].xyzz; 14: MUL temp[3].xyz, temp[0].xyzz, temp[2].xyzz; 15: TEX temp[0].xyz, input[1].xyyy, 2D[0]; 16: MAD temp[1].xyz, temp[0].xyzx, const[2].xyzx, temp[3].xyzx; 17: MOV output[0], temp[1]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[1].xxxx; 2: TEX temp[2].xyz, input[1].xyyy, 2D[2]; 3: ADD temp[3].xyz, temp[2].xyzz, const[5].xxxy; 4: DP3 temp[2].x, temp[3].xyzz, temp[3].xyzz; 5: RSQ temp[4].x, temp[2].xxxx; 6: MUL temp[2].xyz, temp[3].xyzz, temp[4].xxxx; 7: DP3 temp[3].x, input[0].xyzz, input[0].xyzz; 8: RSQ temp[4].x, temp[3].xxxx; 9: MUL temp[3].xyz, input[0].xyzz, temp[4].xxxx; 10: DP3 temp[4].x, temp[2].xyzz, temp[3].xyzz; 11: MAX temp[2].x, temp[4].xxxx, const[5].yyyy; 12: MUL temp[3].xyz, const[3].xyzz, temp[2].xxxx; 13: MAD temp[2].xyz, temp[3].xyzz, const[0].xyzz, const[4].xyzz; 14: MUL temp[3].xyz, temp[0].xyzz, temp[2].xyzz; 15: TEX temp[0].xyz, input[1].xyyy, 2D[0]; 16: MAD temp[1].xyz, temp[0].xyzx, const[2].xyzx, temp[3].xyzx; 17: MOV output[0], temp[1]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[1].xxxx; 2: TEX temp[2].xyz, input[1].xyyy, 2D[2]; 3: ADD temp[3].xyz, temp[2].xyzz, const[5].xxxy; 4: DP3 temp[2].x, temp[3].xyzz, temp[3].xyzz; 5: RSQ temp[4].x, temp[2].xxxx; 6: MUL temp[2].xyz, temp[3].xyzz, temp[4].xxxx; 7: DP3 temp[3].x, input[0].xyzz, input[0].xyzz; 8: RSQ temp[4].x, temp[3].xxxx; 9: MUL temp[3].xyz, input[0].xyzz, temp[4].xxxx; 10: DP3 temp[4].x, temp[2].xyzz, temp[3].xyzz; 11: MAX temp[2].x, temp[4].xxxx, const[5].yyyy; 12: MUL temp[3].xyz, const[3].xyzz, temp[2].xxxx; 13: MAD temp[2].xyz, temp[3].xyzz, const[0].xyzz, const[4].xyzz; 14: MUL temp[3].xyz, temp[0].xyzz, temp[2].xyzz; 15: TEX temp[0].xyz, input[1].xyyy, 2D[0]; 16: MAD temp[1].xyz, temp[0].xyzx, const[2].xyzx, temp[3].xyzx; 17: MOV output[0], temp[1]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[1].xxxx; 2: TEX temp[2].xyz, input[1].xyyy, 2D[2]; 3: ADD temp[3].xyz, temp[2].xyzz, const[5].xxxy; 4: DP3 temp[2].x, temp[3].xyzz, temp[3].xyzz; 5: RSQ temp[4].x, temp[2].xxxx; 6: MUL temp[2].xyz, temp[3].xyzz, temp[4].xxxx; 7: DP3 temp[3].x, input[0].xyzz, input[0].xyzz; 8: RSQ temp[4].x, temp[3].xxxx; 9: MUL temp[3].xyz, input[0].xyzz, temp[4].xxxx; 10: DP3 temp[4].x, temp[2].xyzz, temp[3].xyzz; 11: MAX temp[2].x, temp[4].xxxx, const[5].yyyy; 12: MUL temp[3].xyz, const[3].xyzz, temp[2].xxxx; 13: MAD temp[2].xyz, temp[3].xyzz, const[0].xyzz, const[4].xyzz; 14: MUL temp[3].xyz, temp[0].xyzz, temp[2].xyzz; 15: TEX temp[0].xyz, input[1].xyyy, 2D[0]; 16: MAD temp[1].xyz, temp[0].xyzx, const[2].xyzx, temp[3].xyzx; 17: MOV output[0], temp[1]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[1].xxxx; 2: TEX temp[5], input[1].xyyy, 2D[2]; 3: MOV temp[2].xyz, temp[5]; 4: ADD temp[3].xyz, temp[2].xyzz, const[5].xxxy; 5: DP3 temp[2].x, temp[3].xyzz, temp[3].xyzz; 6: RSQ temp[4].x, temp[2].xxxx; 7: MUL temp[2].xyz, temp[3].xyzz, temp[4].xxxx; 8: DP3 temp[3].x, input[0].xyzz, input[0].xyzz; 9: RSQ temp[4].x, temp[3].xxxx; 10: MUL temp[3].xyz, input[0].xyzz, temp[4].xxxx; 11: DP3 temp[4].x, temp[2].xyzz, temp[3].xyzz; 12: MAX temp[2].x, temp[4].xxxx, const[5].yyyy; 13: MUL temp[3].xyz, const[3].xyzz, temp[2].xxxx; 14: MAD temp[2].xyz, temp[3].xyzz, const[0].xyzz, const[4].xyzz; 15: MUL temp[3].xyz, temp[0].xyzz, temp[2].xyzz; 16: TEX temp[6], input[1].xyyy, 2D[0]; 17: MOV temp[0].xyz, temp[6]; 18: MAD temp[1].xyz, temp[0].xyzx, const[2].xyzx, temp[3].xyzx; 19: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[1]; 1: MUL temp[1].w, temp[0].xxxw, const[1].xxxx; 2: TEX temp[5], input[1].xyyy, 2D[2]; 3: MOV temp[2].xyz, temp[5]; 4: ADD temp[3].xyz, temp[2].xyzz, const[5].xxxy; 5: DP3 temp[2].x, temp[3].xyzz, temp[3].xyzz; 6: RSQ temp[4].x, |temp[2].xxxx|; 7: MUL temp[2].xyz, temp[3].xyzz, temp[4].xxxx; 8: DP3 temp[3].x, input[0].xyzz, input[0].xyzz; 9: RSQ temp[4].x, |temp[3].xxxx|; 10: MUL temp[3].xyz, input[0].xyzz, temp[4].xxxx; 11: DP3 temp[4].x, temp[2].xyzz, temp[3].xyzz; 12: MAX temp[2].x, temp[4].xxxx, const[5].yyyy; 13: MUL temp[3].xyz, const[3].xyzz, temp[2].xxxx; 14: MAD temp[2].xyz, temp[3].xyzz, const[0].xyzz, const[4].xyzz; 15: MUL temp[3].xyz, temp[0].xyzz, temp[2].xyzz; 16: TEX temp[6], input[1].xyyy, 2D[0]; 17: MOV temp[0].xyz, temp[6]; 18: MAD temp[1].xyz, temp[0].xyzx, const[2].xyzx, temp[3].xyzx; 19: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[1].___x; 2: TEX temp[5].xyz, input[1].xy__, 2D[2]; 3: MOV temp[2].xyz, temp[5].xyz_; 4: ADD temp[3].xyz, temp[2].xyz_, const[5].xxx_; 5: DP3 temp[2].x, temp[3].xyz_, temp[3].xyz_; 6: RSQ temp[4].x, |temp[2].x___|; 7: MUL temp[2].xyz, temp[3].xyz_, temp[4].xxx_; 8: DP3 temp[3].x, input[0].xyz_, input[0].xyz_; 9: RSQ temp[4].x, |temp[3].x___|; 10: MUL temp[3].xyz, input[0].xyz_, temp[4].xxx_; 11: DP3 temp[4].x, temp[2].xyz_, temp[3].xyz_; 12: MAX temp[2].x, temp[4].x___, const[5].y___; 13: MUL temp[3].xyz, const[3].xyz_, temp[2].xxx_; 14: MAD temp[2].xyz, temp[3].xyz_, const[0].xyz_, const[4].xyz_; 15: MUL temp[3].xyz, temp[0].xyz_, temp[2].xyz_; 16: TEX temp[6].xyz, input[1].xy__, 2D[0]; 17: MOV temp[0].xyz, temp[6].xyz_; 18: MAD temp[1].xyz, temp[0].xyz_, const[2].xyz_, temp[3].xyz_; 19: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[1].___x; 2: TEX temp[5].xyz, input[1].xy__, 2D[2]; 3: MOV temp[2].xyz, temp[5].xyz_; 4: ADD temp[3].xyz, temp[2].xyz_, const[5].xxx_; 5: DP3 temp[2].x, temp[3].xyz_, temp[3].xyz_; 6: RSQ temp[4].x, |temp[2].x___|; 7: MUL temp[2].xyz, temp[3].xyz_, temp[4].xxx_; 8: DP3 temp[3].x, input[0].xyz_, input[0].xyz_; 9: RSQ temp[4].x, |temp[3].x___|; 10: MUL temp[3].xyz, input[0].xyz_, temp[4].xxx_; 11: DP3 temp[4].x, temp[2].xyz_, temp[3].xyz_; 12: MAX temp[2].x, temp[4].x___, const[5].y___; 13: MUL temp[3].xyz, const[3].xyz_, temp[2].xxx_; 14: MAD temp[2].xyz, temp[3].xyz_, const[0].xyz_, const[4].xyz_; 15: MUL temp[3].xyz, temp[0].xyz_, temp[2].xyz_; 16: TEX temp[6].xyz, input[1].xy__, 2D[0]; 17: MOV temp[0].xyz, temp[6].xyz_; 18: MAD temp[1].xyz, temp[0].xyz_, const[2].xyz_, temp[3].xyz_; 19: MOV output[0], temp[1]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[1].___x; 2: TEX temp[5].xyz, input[1].xy__, 2D[2]; 3: ADD temp[3].xyz, temp[5].xyz_, none.-H-H-H_; 4: DP3 temp[2].x, temp[3].xyz_, temp[3].xyz_; 5: RSQ temp[4].x, |temp[2].x___|; 6: MUL temp[2].xyz, temp[3].xyz_, temp[4].xxx_; 7: DP3 temp[3].x, input[0].xyz_, input[0].xyz_; 8: RSQ temp[4].x, |temp[3].x___|; 9: MUL temp[3].xyz, input[0].xyz_, temp[4].xxx_; 10: DP3 temp[4].x, temp[2].xyz_, temp[3].xyz_; 11: MAX temp[2].x, temp[4].x___, none.0___; 12: MUL temp[3].xyz, const[3].xyz_, temp[2].xxx_; 13: MAD temp[2].xyz, temp[3].xyz_, const[0].xyz_, const[4].xyz_; 14: MUL temp[3].xyz, temp[0].xyz_, temp[2].xyz_; 15: TEX temp[6].xyz, input[1].xy__, 2D[0]; 16: MAD temp[1].xyz, temp[6].xyz_, const[2].xyz_, temp[3].xyz_; 17: MOV output[0], temp[1]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[1].___x; 2: TEX temp[5].xyz, input[1].xy__, 2D[2]; 3: ADD temp[3].xyz, temp[5].xyz_, none.-H-H-H_; 4: DP3 temp[2].x, temp[3].xyz_, temp[3].xyz_; 5: RSQ temp[4].x, |temp[2].x___|; 6: MUL temp[2].xyz, temp[3].xyz_, temp[4].xxx_; 7: DP3 temp[3].x, input[0].xyz_, input[0].xyz_; 8: RSQ temp[4].x, |temp[3].x___|; 9: MUL temp[3].xyz, input[0].xyz_, temp[4].xxx_; 10: DP3 temp[4].x, temp[2].xyz_, temp[3].xyz_; 11: MAX temp[2].x, temp[4].x___, none.0___; 12: MUL temp[3].xyz, const[3].xyz_, temp[2].xxx_; 13: MAD temp[2].xyz, temp[3].xyz_, const[0].xyz_, const[4].xyz_; 14: MUL temp[3].xyz, temp[0].xyz_, temp[2].xyz_; 15: TEX temp[6].xyz, input[1].xy__, 2D[0]; 16: MAD temp[1].xyz, temp[6].xyz_, const[2].xyz_, temp[3].xyz_; 17: MOV output[0], temp[1]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[1].___x; 2: TEX temp[5].xyz, input[1].xy__, 2D[2]; 3: ADD temp[3].xyz, temp[5].xyz_, none.-H-H-H_; 4: DP3 temp[2].x, temp[3].xyz_, temp[3].xyz_; 5: RSQ temp[4].x, |temp[2].x___|; 6: MUL temp[2].xyz, temp[3].xyz_, temp[4].xxx_; 7: DP3 temp[3].x, input[0].xyz_, input[0].xyz_; 8: RSQ temp[4].x, |temp[3].x___|; 9: MUL temp[3].xyz, input[0].xyz_, temp[4].xxx_; 10: DP3 temp[4].x, temp[2].xyz_, temp[3].xyz_; 11: MAX temp[2].x, temp[4].x___, none.0___; 12: MUL temp[3].xyz, const[3].xyz_, temp[2].xxx_; 13: MAD temp[2].xyz, temp[3].xyz_, const[0].xyz_, const[4].xyz_; 14: MUL temp[3].xyz, temp[0].xyz_, temp[2].xyz_; 15: TEX temp[6].xyz, input[1].xy__, 2D[0]; 16: MAD temp[1].xyz, temp[6].xyz_, const[2].xyz_, temp[3].xyz_; 17: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: MUL temp[1].w, temp[0].___w, const[1].___x; 2: TEX temp[2].xyz, input[1].xy__, 2D[2]; 3: ADD temp[3].xyz, temp[2].xyz_, none.-H-H-H_; 4: DP3 temp[4].x, temp[3].xyz_, temp[3].xyz_; 5: RSQ temp[5].x, |temp[4].x___|; 6: MUL temp[4].xyz, temp[3].xyz_, temp[5].xxx_; 7: DP3 temp[3].x, input[0].xyz_, input[0].xyz_; 8: RSQ temp[6].x, |temp[3].x___|; 9: MUL temp[7].xyz, input[0].xyz_, temp[6].xxx_; 10: DP3 temp[8].x, temp[4].xyz_, temp[7].xyz_; 11: MAX temp[4].x, temp[8].x___, none.0___; 12: MUL temp[9].xyz, const[3].xyz_, temp[4].xxx_; 13: MAD temp[10].xyz, temp[9].xyz_, const[0].xyz_, const[4].xyz_; 14: MUL temp[11].xyz, temp[0].xyz_, temp[10].xyz_; 15: TEX temp[12].xyz, input[1].xy__, 2D[0]; 16: MAD temp[1].xyz, temp[12].xyz_, const[2].xyz_, temp[11].xyz_; 17: MOV output[0], temp[1]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[1]; 1: src0.xyz = const[1], src0.w = temp[0] MAD temp[1].w, src0.w, src0.x, src0.0 2: TEX temp[2].xyz, input[1].xy__, 2D[2]; 3: src0.xyz = temp[2] MAD temp[3].xyz, src0.xyz, src0.111, -src0.HHH 4: src0.xyz = temp[3] DP3 temp[4].x, src0.xyz, src0.xyz 5: src0.xyz = temp[4] REPL_ALPHA temp[5].x RSQ, |src0.x| 6: src0.xyz = temp[3], src1.xyz = temp[5] MAD temp[4].xyz, src0.xyz, src1.xxx, src0.000 7: src0.xyz = input[0] DP3 temp[3].x, src0.xyz, src0.xyz 8: src0.xyz = temp[3] REPL_ALPHA temp[6].x RSQ, |src0.x| 9: src0.xyz = input[0], src1.xyz = temp[6] MAD temp[7].xyz, src0.xyz, src1.xxx, src0.000 10: src0.xyz = temp[4], src1.xyz = temp[7] DP3 temp[8].x, src0.xyz, src1.xyz 11: src0.xyz = temp[8] MAX temp[4].x, src0.x__, src0.0__ 12: src0.xyz = const[3], src1.xyz = temp[4] MAD temp[9].xyz, src0.xyz, src1.xxx, src0.000 13: src0.xyz = temp[9], src1.xyz = const[0], src2.xyz = const[4] MAD temp[10].xyz, src0.xyz, src1.xyz, src2.xyz 14: src0.xyz = temp[0], src1.xyz = temp[10] MAD temp[11].xyz, src0.xyz, src1.xyz, src0.000 15: TEX temp[12].xyz, input[1].xy__, 2D[0]; 16: src0.xyz = temp[12], src1.xyz = const[2], src2.xyz = temp[11] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 17: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[1].xy__, 2D[1]; 2: TEX temp[2].xyz, input[1].xy__, 2D[2]; 3: TEX temp[12].xyz, input[1].xy__, 2D[0]; 4: src0.xyz = temp[2], src0.w = temp[0], src1.xyz = const[1] MAD temp[3].xyz, src0.xyz, src0.111, -src0.HHH MAD temp[1].w, src0.w, src1.x, src0.0 5: src0.xyz = temp[3] DP3 temp[4].x, src0.xyz, src0.xyz 6: src0.xyz = temp[4] REPL_ALPHA temp[5].x RSQ, |src0.x| 7: src0.xyz = temp[3], src1.xyz = temp[5] MAD temp[4].xyz, src0.xyz, src1.xxx, src0.000 8: src0.xyz = input[0] DP3 temp[3].x, src0.xyz, src0.xyz 9: src0.xyz = temp[3] REPL_ALPHA temp[6].x RSQ, |src0.x| 10: src0.xyz = input[0], src1.xyz = temp[6] MAD temp[7].xyz, src0.xyz, src1.xxx, src0.000 11: src0.xyz = temp[4], src1.xyz = temp[7] DP3 temp[8].x, src0.xyz, src1.xyz 12: src0.xyz = temp[8] MAX temp[4].x, src0.x__, src0.0__ 13: src0.xyz = const[3], src1.xyz = temp[4] MAD temp[9].xyz, src0.xyz, src1.xxx, src0.000 14: src0.xyz = temp[9], src1.xyz = const[0], src2.xyz = const[4] MAD temp[10].xyz, src0.xyz, src1.xyz, src2.xyz 15: src0.xyz = temp[0], src1.xyz = temp[10] MAD temp[11].xyz, src0.xyz, src1.xyz, src0.000 16: src0.xyz = temp[12], src1.xyz = const[2], src2.xyz = temp[11] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 17: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[2], temp[1].xy__, 2D[1]; 2: TEX temp[3].xyz, temp[1].xy__, 2D[2]; 3: TEX temp[6].xyz, temp[1].xy__, 2D[0]; 4: src0.xyz = temp[3], src0.w = temp[2], src1.xyz = const[1] MAD temp[3].xyz, src0.xyz, src0.111, -src0.HHH MAD temp[1].w, src0.w, src1.x, src0.0 5: src0.xyz = temp[3] DP3 temp[4].x, src0.xyz, src0.xyz 6: src0.xyz = temp[4] REPL_ALPHA temp[5].x RSQ, |src0.x| 7: src0.xyz = temp[3], src1.xyz = temp[5] MAD temp[4].xyz, src0.xyz, src1.xxx, src0.000 8: src0.xyz = temp[0] DP3 temp[3].x, src0.xyz, src0.xyz 9: src0.xyz = temp[3] REPL_ALPHA temp[3].x RSQ, |src0.x| 10: src0.xyz = temp[0], src1.xyz = temp[3] MAD temp[0].xyz, src0.xyz, src1.xxx, src0.000 11: src0.xyz = temp[4], src1.xyz = temp[0] DP3 temp[0].x, src0.xyz, src1.xyz 12: src0.xyz = temp[0] MAX temp[4].x, src0.x__, src0.0__ 13: src0.xyz = const[3], src1.xyz = temp[4] MAD temp[0].xyz, src0.xyz, src1.xxx, src0.000 14: src0.xyz = temp[0], src1.xyz = const[0], src2.xyz = const[4] MAD temp[0].xyz, src0.xyz, src1.xyz, src2.xyz 15: src0.xyz = temp[2], src1.xyz = temp[0] MAD temp[0].xyz, src0.xyz, src1.xyz, src0.000 16: src0.xyz = temp[6], src1.xyz = const[2], src2.xyz = temp[0] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 17: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=18************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 13, tex_end: 2 (code_addr: 00440340) TEX: TEX t2, t1, texture[1] (00008881) TEX t3, t1, texture[2] (000090c1) TEX t6, t1, texture[0] (00008181) 0: xyz: t3 c1 t0 bias-> t3.xyz (038c0843) w: t2 t0 t0 bias-> t1.w (00840002) xyz: t3.xyz 1.0 -0.5 op: 000d8a80 w: t2.w c1.x 0.0 op: 00040189 1: xyz: t3 t0 t0 bias-> t4.x (00900003) w: t0 t0 t0 bias-> (00000000) xyz: t3.xyz t3.xyz t3.xxx op: 00804000 w: t3.x t3.x t3.x op: 00000000 2: xyz: t4 t0 t0 bias-> t5.x (00940004) w: t0 t0 t0 bias-> (00000000) xyz: t4.xxx t4.xxx t4.xxx op: 05004081 w: |t4.x| t4.x t4.x op: 05800040 3: xyz: t3 t5 t0 bias-> t4.xyz (03900143) w: t0 t0 t0 bias-> (00000000) xyz: t3.xyz t5.xxx 0.0 op: 00050280 w: t3.x t3.x t3.x op: 00000000 4: xyz: t0 t0 t0 bias-> t3.x (008c0000) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz t0.xyz t0.xxx op: 00804000 w: t0.x t0.x t0.x op: 00000000 5: xyz: t3 t0 t0 bias-> t3.x (008c0003) w: t0 t0 t0 bias-> (00000000) xyz: t3.xxx t3.xxx t3.xxx op: 05004081 w: |t3.x| t3.x t3.x op: 05800040 6: xyz: t0 t3 t0 bias-> t0.xyz (038000c0) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz t3.xxx 0.0 op: 00050280 w: t0.x t0.x t0.x op: 00000000 7: xyz: t4 t0 t0 bias-> t0.x (00800004) w: t0 t0 t0 bias-> (00000000) xyz: t4.xyz t0.xyz t4.xxx op: 00804200 w: t4.x t4.x t4.x op: 00000000 8: xyz: t0 t0 t0 bias-> t4.x (00900000) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz 0.0 t0.xxx op: 02804a00 w: t0.x t0.x t0.x op: 00000000 9: xyz: c3 t4 t0 bias-> t0.xyz (03800123) w: t0 t0 t0 bias-> (00000000) xyz: c3.xyz t4.xxx 0.0 op: 00050280 w: c3.x c3.x c3.x op: 00000000 10: xyz: t0 c0 c4 bias-> t0.xyz (03824800) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz c0.xyz c4.xyz op: 00020200 w: t0.x t0.x t0.x op: 00000000 11: xyz: t2 t0 t0 bias-> t0.xyz (03800002) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz t0.xyz 0.0 op: 00050200 w: t2.x t2.x t2.x op: 00000000 12: xyz: t6 c2 t0 bias-> t1.xyz (03840886) w: t0 t0 t0 bias-> (00000000) xyz: t6.xyz c2.xyz t0.xyz op: 00020200 w: t6.x t6.x t6.x op: 00000000 13: xyz: t1 t0 t0 bias-> o0.xyz (1c000001) w: t1 t0 t0 bias-> o0.w (01000001) xyz: t1.xyz 1.0 0.0 op: 00050a80 w: t1.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[10] DCL OUT[2], GENERIC[11] DCL CONST[0..8] DCL TEMP[0..1] 0: MUL TEMP[0], CONST[0], IN[1].xxxx 1: MAD TEMP[1], CONST[1], IN[1].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[1].zzzz, TEMP[1] 3: MAD OUT[2].xy, CONST[3].xyxx, IN[1].wwxx, TEMP[0].xyxx 4: DP3 OUT[1].x, CONST[4].xyzz, IN[2].xyzz 5: DP3 OUT[1].y, CONST[4].xyzz, IN[3].xyzz 6: DP3 OUT[1].z, CONST[4].xyzz, IN[4].xyzz 7: MUL TEMP[0], CONST[5], IN[0].xxxx 8: MAD TEMP[1], CONST[6], IN[0].yyyy, TEMP[0] 9: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[1] 10: MAD OUT[0], CONST[8], IN[0].wwww, TEMP[0] 11: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP3 output[1].x, const[4].xyzz, input[2].xyzz; 5: DP3 output[1].y, const[4].xyzz, input[3].xyzz; 6: DP3 output[1].z, const[4].xyzz, input[4].xyzz; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP3 output[1].x, const[4].xyzz, input[2].xyzz; 5: DP3 output[1].y, const[4].xyzz, input[3].xyzz; 6: DP3 output[1].z, const[4].xyzz, input[4].xyzz; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP3 output[1].x, const[4].xyzz, input[2].xyzz; 5: DP3 output[1].y, const[4].xyzz, input[3].xyzz; 6: DP3 output[1].z, const[4].xyzz, input[4].xyzz; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP3 output[1].x, const[4].xyzz, input[2].xyzz; 5: DP3 output[1].y, const[4].xyzz, input[3].xyzz; 6: DP3 output[1].z, const[4].xyzz, input[4].xyzz; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[0], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[0]; 12: MOV output[3], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[0], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[0]; 12: MOV output[3], temp[0]; Final vertex program code: 0: op: 0x00300002 dst: 0t op: VE_MULTIPLY src0: 0x01f90002 reg: 0c swiz: X/ Y/ U/ U src1: 0x01f80021 reg: 1i swiz: X/ X/ U/ U src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00302004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x01f90022 reg: 1c swiz: X/ Y/ U/ U src1: 0x01f92021 reg: 1i swiz: Y/ Y/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 2: op: 0x00300004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x01f90042 reg: 2c swiz: X/ Y/ U/ U src1: 0x01fa4021 reg: 1i swiz: Z/ Z/ U/ U src2: 0x01f90020 reg: 1t swiz: X/ Y/ U/ U 3: op: 0x00304204 dst: 2o op: VE_MULTIPLY_ADD src0: 0x01f90062 reg: 3c swiz: X/ Y/ U/ U src1: 0x01fb6021 reg: 1i swiz: W/ W/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 4: op: 0x00102201 dst: 1o op: VE_DOT_PRODUCT src0: 0x01110082 reg: 4c swiz: X/ Y/ Z/ 0 src1: 0x01110041 reg: 2i swiz: X/ Y/ Z/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 5: op: 0x00202201 dst: 1o op: VE_DOT_PRODUCT src0: 0x01110082 reg: 4c swiz: X/ Y/ Z/ 0 src1: 0x01110061 reg: 3i swiz: X/ Y/ Z/ 0 src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 6: op: 0x00402201 dst: 1o op: VE_DOT_PRODUCT src0: 0x01110082 reg: 4c swiz: X/ Y/ Z/ 0 src1: 0x01110081 reg: 4i swiz: X/ Y/ Z/ 0 src2: 0x01248081 reg: 4i swiz: 0/ 0/ 0/ 0 7: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 8: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 9: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 10: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10102 reg: 8c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 11: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 12: op: 0x00f06203 dst: 3o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL CONST[0..4] DCL TEMP[0..4] 0: TEX TEMP[0], IN[0].xyyy, SAMP[3], 2D 1: MUL TEMP[1].w, TEMP[0].xxxw, CONST[0].xxxx 2: TEX TEMP[2].xyz, IN[0].xyyy, SAMP[0], 2D 3: TEX TEMP[3].xyz, IN[0].xyyy, SAMP[1], 2D 4: MUL TEMP[4].xyz, TEMP[3].xyzz, CONST[4].xyzz 5: MAD TEMP[3].xyz, TEMP[2].xyzz, CONST[3].xyzz, TEMP[4].xyzz 6: ADD TEMP[2].xyz, TEMP[0].xyzz, TEMP[3].xyzz 7: MUL TEMP[0].xyz, TEMP[2].xyzz, CONST[2].xyzz 8: TEX TEMP[2].xyz, IN[0].xyyy, SAMP[2], 2D 9: MAD TEMP[1].xyz, TEMP[2].xyzx, CONST[1].xyzx, TEMP[0].xyzx 10: MOV OUT[0], TEMP[1] 11: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[3]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: TEX temp[3].xyz, input[0].xyyy, 2D[1]; 4: MUL temp[4].xyz, temp[3].xyzz, const[4].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[3].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: MUL temp[0].xyz, temp[2].xyzz, const[2].xyzz; 8: TEX temp[2].xyz, input[0].xyyy, 2D[2]; 9: MAD temp[1].xyz, temp[2].xyzx, const[1].xyzx, temp[0].xyzx; 10: MOV output[0], temp[1]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[3]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: TEX temp[3].xyz, input[0].xyyy, 2D[1]; 4: MUL temp[4].xyz, temp[3].xyzz, const[4].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[3].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: MUL temp[0].xyz, temp[2].xyzz, const[2].xyzz; 8: TEX temp[2].xyz, input[0].xyyy, 2D[2]; 9: MAD temp[1].xyz, temp[2].xyzx, const[1].xyzx, temp[0].xyzx; 10: MOV output[0], temp[1]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[3]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: TEX temp[3].xyz, input[0].xyyy, 2D[1]; 4: MUL temp[4].xyz, temp[3].xyzz, const[4].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[3].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: MUL temp[0].xyz, temp[2].xyzz, const[2].xyzz; 8: TEX temp[2].xyz, input[0].xyyy, 2D[2]; 9: MAD temp[1].xyz, temp[2].xyzx, const[1].xyzx, temp[0].xyzx; 10: MOV output[0], temp[1]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[3]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: TEX temp[3].xyz, input[0].xyyy, 2D[1]; 4: MUL temp[4].xyz, temp[3].xyzz, const[4].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[3].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: MUL temp[0].xyz, temp[2].xyzz, const[2].xyzz; 8: TEX temp[2].xyz, input[0].xyyy, 2D[2]; 9: MAD temp[1].xyz, temp[2].xyzx, const[1].xyzx, temp[0].xyzx; 10: MOV output[0], temp[1]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[3]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: TEX temp[3].xyz, input[0].xyyy, 2D[1]; 4: MUL temp[4].xyz, temp[3].xyzz, const[4].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[3].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: MUL temp[0].xyz, temp[2].xyzz, const[2].xyzz; 8: TEX temp[2].xyz, input[0].xyyy, 2D[2]; 9: MAD temp[1].xyz, temp[2].xyzx, const[1].xyzx, temp[0].xyzx; 10: MOV output[0], temp[1]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[3]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[5], input[0].xyyy, 2D[0]; 3: MOV temp[2].xyz, temp[5]; 4: TEX temp[6], input[0].xyyy, 2D[1]; 5: MOV temp[3].xyz, temp[6]; 6: MUL temp[4].xyz, temp[3].xyzz, const[4].xyzz; 7: MAD temp[3].xyz, temp[2].xyzz, const[3].xyzz, temp[4].xyzz; 8: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 9: MUL temp[0].xyz, temp[2].xyzz, const[2].xyzz; 10: TEX temp[7], input[0].xyyy, 2D[2]; 11: MOV temp[2].xyz, temp[7]; 12: MAD temp[1].xyz, temp[2].xyzx, const[1].xyzx, temp[0].xyzx; 13: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[3]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[5], input[0].xyyy, 2D[0]; 3: MOV temp[2].xyz, temp[5]; 4: TEX temp[6], input[0].xyyy, 2D[1]; 5: MOV temp[3].xyz, temp[6]; 6: MUL temp[4].xyz, temp[3].xyzz, const[4].xyzz; 7: MAD temp[3].xyz, temp[2].xyzz, const[3].xyzz, temp[4].xyzz; 8: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 9: MUL temp[0].xyz, temp[2].xyzz, const[2].xyzz; 10: TEX temp[7], input[0].xyyy, 2D[2]; 11: MOV temp[2].xyz, temp[7]; 12: MAD temp[1].xyz, temp[2].xyzx, const[1].xyzx, temp[0].xyzx; 13: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[3]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[5].xyz, input[0].xy__, 2D[0]; 3: MOV temp[2].xyz, temp[5].xyz_; 4: TEX temp[6].xyz, input[0].xy__, 2D[1]; 5: MOV temp[3].xyz, temp[6].xyz_; 6: MUL temp[4].xyz, temp[3].xyz_, const[4].xyz_; 7: MAD temp[3].xyz, temp[2].xyz_, const[3].xyz_, temp[4].xyz_; 8: ADD temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 9: MUL temp[0].xyz, temp[2].xyz_, const[2].xyz_; 10: TEX temp[7].xyz, input[0].xy__, 2D[2]; 11: MOV temp[2].xyz, temp[7].xyz_; 12: MAD temp[1].xyz, temp[2].xyz_, const[1].xyz_, temp[0].xyz_; 13: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[3]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[5].xyz, input[0].xy__, 2D[0]; 3: MOV temp[2].xyz, temp[5].xyz_; 4: TEX temp[6].xyz, input[0].xy__, 2D[1]; 5: MOV temp[3].xyz, temp[6].xyz_; 6: MUL temp[4].xyz, temp[3].xyz_, const[4].xyz_; 7: MAD temp[3].xyz, temp[2].xyz_, const[3].xyz_, temp[4].xyz_; 8: ADD temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 9: MUL temp[0].xyz, temp[2].xyz_, const[2].xyz_; 10: TEX temp[7].xyz, input[0].xy__, 2D[2]; 11: MOV temp[2].xyz, temp[7].xyz_; 12: MAD temp[1].xyz, temp[2].xyz_, const[1].xyz_, temp[0].xyz_; 13: MOV output[0], temp[1]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[3]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[5].xyz, input[0].xy__, 2D[0]; 3: TEX temp[6].xyz, input[0].xy__, 2D[1]; 4: MUL temp[4].xyz, temp[6].xyz_, const[4].xyz_; 5: MAD temp[3].xyz, temp[5].xyz_, const[3].xyz_, temp[4].xyz_; 6: MUL temp[0].xyz, (temp[3] + temp[0]).xyz_, const[2].xyz_; 7: TEX temp[7].xyz, input[0].xy__, 2D[2]; 8: MAD temp[1].xyz, temp[7].xyz_, const[1].xyz_, temp[0].xyz_; 9: MOV output[0], temp[1]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[3]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[5].xyz, input[0].xy__, 2D[0]; 3: TEX temp[6].xyz, input[0].xy__, 2D[1]; 4: MUL temp[4].xyz, temp[6].xyz_, const[4].xyz_; 5: MAD temp[3].xyz, temp[5].xyz_, const[3].xyz_, temp[4].xyz_; 6: MUL temp[0].xyz, (temp[3] + temp[0]).xyz_, const[2].xyz_; 7: TEX temp[7].xyz, input[0].xy__, 2D[2]; 8: MAD temp[1].xyz, temp[7].xyz_, const[1].xyz_, temp[0].xyz_; 9: MOV output[0], temp[1]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[3]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[5].xyz, input[0].xy__, 2D[0]; 3: TEX temp[6].xyz, input[0].xy__, 2D[1]; 4: MUL temp[4].xyz, temp[6].xyz_, const[4].xyz_; 5: MAD temp[3].xyz, temp[5].xyz_, const[3].xyz_, temp[4].xyz_; 6: MUL temp[0].xyz, (temp[3] + temp[0]).xyz_, const[2].xyz_; 7: TEX temp[7].xyz, input[0].xy__, 2D[2]; 8: MAD temp[1].xyz, temp[7].xyz_, const[1].xyz_, temp[0].xyz_; 9: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[3]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[2].xyz, input[0].xy__, 2D[0]; 3: TEX temp[3].xyz, input[0].xy__, 2D[1]; 4: MUL temp[4].xyz, temp[3].xyz_, const[4].xyz_; 5: MAD temp[5].xyz, temp[2].xyz_, const[3].xyz_, temp[4].xyz_; 6: MUL temp[0].xyz, (temp[5] + temp[0]).xyz_, const[2].xyz_; 7: TEX temp[6].xyz, input[0].xy__, 2D[2]; 8: MAD temp[1].xyz, temp[6].xyz_, const[1].xyz_, temp[0].xyz_; 9: MOV output[0], temp[1]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[3]; 1: src0.xyz = const[0], src0.w = temp[0] MAD temp[1].w, src0.w, src0.x, src0.0 2: TEX temp[2].xyz, input[0].xy__, 2D[0]; 3: TEX temp[3].xyz, input[0].xy__, 2D[1]; 4: src0.xyz = temp[3], src1.xyz = const[4] MAD temp[4].xyz, src0.xyz, src1.xyz, src0.000 5: src0.xyz = temp[2], src1.xyz = const[3], src2.xyz = temp[4] MAD temp[5].xyz, src0.xyz, src1.xyz, src2.xyz 6: src0.xyz = temp[0], src1.xyz = temp[5], src2.xyz = const[2], srcp.xyz = (src1 + src0) MAD temp[0].xyz, srcp.xyz, src2.xyz, src0.000 7: TEX temp[6].xyz, input[0].xy__, 2D[2]; 8: src0.xyz = temp[6], src1.xyz = const[1], src2.xyz = temp[0] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 9: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[0].xy__, 2D[3]; 2: TEX temp[2].xyz, input[0].xy__, 2D[0]; 3: TEX temp[3].xyz, input[0].xy__, 2D[1]; 4: TEX temp[6].xyz, input[0].xy__, 2D[2]; 5: src0.xyz = temp[3], src0.w = temp[0], src1.xyz = const[4], src2.xyz = const[0] MAD temp[4].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[1].w, src0.w, src2.x, src0.0 6: src0.xyz = temp[2], src1.xyz = const[3], src2.xyz = temp[4] MAD temp[5].xyz, src0.xyz, src1.xyz, src2.xyz 7: src0.xyz = temp[0], src1.xyz = temp[5], src2.xyz = const[2], srcp.xyz = (src1 + src0) MAD temp[0].xyz, srcp.xyz, src2.xyz, src0.000 8: src0.xyz = temp[6], src1.xyz = const[1], src2.xyz = temp[0] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 9: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], temp[0].xy__, 2D[3]; 2: TEX temp[2].xyz, temp[0].xy__, 2D[0]; 3: TEX temp[3].xyz, temp[0].xy__, 2D[1]; 4: TEX temp[4].xyz, temp[0].xy__, 2D[2]; 5: src0.xyz = temp[3], src0.w = temp[1], src1.xyz = const[4], src2.xyz = const[0] MAD temp[3].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[0].w, src0.w, src2.x, src0.0 6: src0.xyz = temp[2], src1.xyz = const[3], src2.xyz = temp[3] MAD temp[2].xyz, src0.xyz, src1.xyz, src2.xyz 7: src0.xyz = temp[1], src1.xyz = temp[2], src2.xyz = const[2], srcp.xyz = (src1 + src0) MAD temp[1].xyz, srcp.xyz, src2.xyz, src0.000 8: src0.xyz = temp[4], src1.xyz = const[1], src2.xyz = temp[1] MAD temp[0].xyz, src0.xyz, src1.xyz, src2.xyz 9: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=19************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 4, tex_end: 3 (code_addr: 00460100) TEX: TEX t1, t0, texture[3] (00009840) TEX t2, t0, texture[0] (00008080) TEX t3, t0, texture[1] (000088c0) TEX t4, t0, texture[2] (00009100) 0: xyz: t3 c4 c0 bias-> t3.xyz (038e0903) w: t1 t0 t0 bias-> t0.w (00800001) xyz: t3.xyz c4.xyz 0.0 op: 00050200 w: t1.w c0.x 0.0 op: 00040309 1: xyz: t2 c3 t3 bias-> t2.xyz (038838c2) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz c3.xyz t3.xyz op: 80020200 NOP w: t2.x t2.x t2.x op: 00000000 2: xyz: t1 t2 c2 add-> t1.xyz (03862081) w: t0 t0 t0 bias-> (00000000) xyz: srcp.xyz c2.xyz 0.0 op: 0045040f w: t1.x t1.x t1.x op: 00000000 3: xyz: t4 c1 t1 bias-> t0.xyz (03801844) w: t0 t0 t0 bias-> (00000000) xyz: t4.xyz c1.xyz t1.xyz op: 00020200 w: t4.x t4.x t4.x op: 00000000 4: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[10] DCL CONST[0..7] DCL TEMP[0..1] 0: MUL TEMP[0], CONST[0], IN[1].xxxx 1: MAD TEMP[1], CONST[1], IN[1].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[1].zzzz, TEMP[1] 3: MAD OUT[1].xy, CONST[3].xyxx, IN[1].wwxx, TEMP[0].xyxx 4: MUL TEMP[0], CONST[4], IN[0].xxxx 5: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[0] 6: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[1] 7: MAD OUT[0], CONST[7], IN[0].wwww, TEMP[0] 8: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[0], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[0]; 9: MOV output[2], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[0], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[0]; 9: MOV output[2], temp[0]; Final vertex program code: 0: op: 0x00300002 dst: 0t op: VE_MULTIPLY src0: 0x01f90002 reg: 0c swiz: X/ Y/ U/ U src1: 0x01f80021 reg: 1i swiz: X/ X/ U/ U src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00302004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x01f90022 reg: 1c swiz: X/ Y/ U/ U src1: 0x01f92021 reg: 1i swiz: Y/ Y/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 2: op: 0x00300004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x01f90042 reg: 2c swiz: X/ Y/ U/ U src1: 0x01fa4021 reg: 1i swiz: Z/ Z/ U/ U src2: 0x01f90020 reg: 1t swiz: X/ Y/ U/ U 3: op: 0x00302204 dst: 1o op: VE_MULTIPLY_ADD src0: 0x01f90062 reg: 3c swiz: X/ Y/ U/ U src1: 0x01fb6021 reg: 1i swiz: W/ W/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 4: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 5: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 6: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 7: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 8: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 9: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL CONST[0..3] DCL TEMP[0..4] 0: TEX TEMP[0], IN[0].xyyy, SAMP[2], 2D 1: MUL TEMP[1].w, TEMP[0].xxxw, CONST[0].xxxx 2: TEX TEMP[2].xyz, IN[0].xyyy, SAMP[0], 2D 3: TEX TEMP[3].xyz, IN[0].xyyy, SAMP[1], 2D 4: MUL TEMP[4].xyz, TEMP[3].xyzz, CONST[3].xyzz 5: MAD TEMP[3].xyz, TEMP[2].xyzz, CONST[2].xyzz, TEMP[4].xyzz 6: ADD TEMP[2].xyz, TEMP[0].xyzz, TEMP[3].xyzz 7: MUL TEMP[1].xyz, TEMP[2].xyzx, CONST[1].xyzx 8: MOV OUT[0], TEMP[1] 9: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[2]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: TEX temp[3].xyz, input[0].xyyy, 2D[1]; 4: MUL temp[4].xyz, temp[3].xyzz, const[3].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[2].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: MUL temp[1].xyz, temp[2].xyzx, const[1].xyzx; 8: MOV output[0], temp[1]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[2]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: TEX temp[3].xyz, input[0].xyyy, 2D[1]; 4: MUL temp[4].xyz, temp[3].xyzz, const[3].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[2].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: MUL temp[1].xyz, temp[2].xyzx, const[1].xyzx; 8: MOV output[0], temp[1]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[2]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: TEX temp[3].xyz, input[0].xyyy, 2D[1]; 4: MUL temp[4].xyz, temp[3].xyzz, const[3].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[2].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: MUL temp[1].xyz, temp[2].xyzx, const[1].xyzx; 8: MOV output[0], temp[1]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[2]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: TEX temp[3].xyz, input[0].xyyy, 2D[1]; 4: MUL temp[4].xyz, temp[3].xyzz, const[3].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[2].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: MUL temp[1].xyz, temp[2].xyzx, const[1].xyzx; 8: MOV output[0], temp[1]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[2]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[2].xyz, input[0].xyyy, 2D[0]; 3: TEX temp[3].xyz, input[0].xyyy, 2D[1]; 4: MUL temp[4].xyz, temp[3].xyzz, const[3].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[2].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: MUL temp[1].xyz, temp[2].xyzx, const[1].xyzx; 8: MOV output[0], temp[1]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[2]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[5], input[0].xyyy, 2D[0]; 3: MOV temp[2].xyz, temp[5]; 4: TEX temp[6], input[0].xyyy, 2D[1]; 5: MOV temp[3].xyz, temp[6]; 6: MUL temp[4].xyz, temp[3].xyzz, const[3].xyzz; 7: MAD temp[3].xyz, temp[2].xyzz, const[2].xyzz, temp[4].xyzz; 8: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 9: MUL temp[1].xyz, temp[2].xyzx, const[1].xyzx; 10: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[0].xyyy, 2D[2]; 1: MUL temp[1].w, temp[0].xxxw, const[0].xxxx; 2: TEX temp[5], input[0].xyyy, 2D[0]; 3: MOV temp[2].xyz, temp[5]; 4: TEX temp[6], input[0].xyyy, 2D[1]; 5: MOV temp[3].xyz, temp[6]; 6: MUL temp[4].xyz, temp[3].xyzz, const[3].xyzz; 7: MAD temp[3].xyz, temp[2].xyzz, const[2].xyzz, temp[4].xyzz; 8: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 9: MUL temp[1].xyz, temp[2].xyzx, const[1].xyzx; 10: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[2]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[5].xyz, input[0].xy__, 2D[0]; 3: MOV temp[2].xyz, temp[5].xyz_; 4: TEX temp[6].xyz, input[0].xy__, 2D[1]; 5: MOV temp[3].xyz, temp[6].xyz_; 6: MUL temp[4].xyz, temp[3].xyz_, const[3].xyz_; 7: MAD temp[3].xyz, temp[2].xyz_, const[2].xyz_, temp[4].xyz_; 8: ADD temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 9: MUL temp[1].xyz, temp[2].xyz_, const[1].xyz_; 10: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[2]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[5].xyz, input[0].xy__, 2D[0]; 3: MOV temp[2].xyz, temp[5].xyz_; 4: TEX temp[6].xyz, input[0].xy__, 2D[1]; 5: MOV temp[3].xyz, temp[6].xyz_; 6: MUL temp[4].xyz, temp[3].xyz_, const[3].xyz_; 7: MAD temp[3].xyz, temp[2].xyz_, const[2].xyz_, temp[4].xyz_; 8: ADD temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 9: MUL temp[1].xyz, temp[2].xyz_, const[1].xyz_; 10: MOV output[0], temp[1]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[2]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[5].xyz, input[0].xy__, 2D[0]; 3: TEX temp[6].xyz, input[0].xy__, 2D[1]; 4: MUL temp[4].xyz, temp[6].xyz_, const[3].xyz_; 5: MAD temp[3].xyz, temp[5].xyz_, const[2].xyz_, temp[4].xyz_; 6: MUL temp[1].xyz, (temp[3] + temp[0]).xyz_, const[1].xyz_; 7: MOV output[0], temp[1]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[2]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[5].xyz, input[0].xy__, 2D[0]; 3: TEX temp[6].xyz, input[0].xy__, 2D[1]; 4: MUL temp[4].xyz, temp[6].xyz_, const[3].xyz_; 5: MAD temp[3].xyz, temp[5].xyz_, const[2].xyz_, temp[4].xyz_; 6: MUL temp[1].xyz, (temp[3] + temp[0]).xyz_, const[1].xyz_; 7: MOV output[0], temp[1]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[2]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[5].xyz, input[0].xy__, 2D[0]; 3: TEX temp[6].xyz, input[0].xy__, 2D[1]; 4: MUL temp[4].xyz, temp[6].xyz_, const[3].xyz_; 5: MAD temp[3].xyz, temp[5].xyz_, const[2].xyz_, temp[4].xyz_; 6: MUL temp[1].xyz, (temp[3] + temp[0]).xyz_, const[1].xyz_; 7: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[2]; 1: MUL temp[1].w, temp[0].___w, const[0].___x; 2: TEX temp[2].xyz, input[0].xy__, 2D[0]; 3: TEX temp[3].xyz, input[0].xy__, 2D[1]; 4: MUL temp[4].xyz, temp[3].xyz_, const[3].xyz_; 5: MAD temp[5].xyz, temp[2].xyz_, const[2].xyz_, temp[4].xyz_; 6: MUL temp[1].xyz, (temp[5] + temp[0]).xyz_, const[1].xyz_; 7: MOV output[0], temp[1]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[2]; 1: src0.xyz = const[0], src0.w = temp[0] MAD temp[1].w, src0.w, src0.x, src0.0 2: TEX temp[2].xyz, input[0].xy__, 2D[0]; 3: TEX temp[3].xyz, input[0].xy__, 2D[1]; 4: src0.xyz = temp[3], src1.xyz = const[3] MAD temp[4].xyz, src0.xyz, src1.xyz, src0.000 5: src0.xyz = temp[2], src1.xyz = const[2], src2.xyz = temp[4] MAD temp[5].xyz, src0.xyz, src1.xyz, src2.xyz 6: src0.xyz = temp[0], src1.xyz = temp[5], src2.xyz = const[1], srcp.xyz = (src1 + src0) MAD temp[1].xyz, srcp.xyz, src2.xyz, src0.000 7: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[0].xy__, 2D[2]; 2: TEX temp[2].xyz, input[0].xy__, 2D[0]; 3: TEX temp[3].xyz, input[0].xy__, 2D[1]; 4: src0.xyz = temp[3], src0.w = temp[0], src1.xyz = const[3], src2.xyz = const[0] MAD temp[4].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[1].w, src0.w, src2.x, src0.0 5: src0.xyz = temp[2], src1.xyz = const[2], src2.xyz = temp[4] MAD temp[5].xyz, src0.xyz, src1.xyz, src2.xyz 6: src0.xyz = temp[0], src1.xyz = temp[5], src2.xyz = const[1], srcp.xyz = (src1 + src0) MAD temp[1].xyz, srcp.xyz, src2.xyz, src0.000 7: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], temp[0].xy__, 2D[2]; 2: TEX temp[2].xyz, temp[0].xy__, 2D[0]; 3: TEX temp[0].xyz, temp[0].xy__, 2D[1]; 4: src0.xyz = temp[0], src0.w = temp[1], src1.xyz = const[3], src2.xyz = const[0] MAD temp[3].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[0].w, src0.w, src2.x, src0.0 5: src0.xyz = temp[2], src1.xyz = const[2], src2.xyz = temp[3] MAD temp[2].xyz, src0.xyz, src1.xyz, src2.xyz 6: src0.xyz = temp[1], src1.xyz = temp[2], src2.xyz = const[1], srcp.xyz = (src1 + src0) MAD temp[0].xyz, srcp.xyz, src2.xyz, src0.000 7: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=20************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 3, tex_end: 2 (code_addr: 004400c0) TEX: TEX t1, t0, texture[2] (00009040) TEX t2, t0, texture[0] (00008080) TEX t0, t0, texture[1] (00008800) 0: xyz: t0 c3 c0 bias-> t3.xyz (038e08c0) w: t1 t0 t0 bias-> t0.w (00800001) xyz: t0.xyz c3.xyz 0.0 op: 00050200 w: t1.w c0.x 0.0 op: 00040309 1: xyz: t2 c2 t3 bias-> t2.xyz (03883882) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz c2.xyz t3.xyz op: 80020200 NOP w: t2.x t2.x t2.x op: 00000000 2: xyz: t1 t2 c1 add-> t0.xyz (03821081) w: t0 t0 t0 bias-> (00000000) xyz: srcp.xyz c1.xyz 0.0 op: 0045040f w: t1.x t1.x t1.x op: 00000000 3: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[10] DCL CONST[0..7] DCL TEMP[0..1] 0: MUL TEMP[0], CONST[0], IN[1].xxxx 1: MAD TEMP[1], CONST[1], IN[1].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[1].zzzz, TEMP[1] 3: MAD OUT[1].xy, CONST[3].xyxx, IN[1].wwxx, TEMP[0].xyxx 4: MUL TEMP[0], CONST[4], IN[0].xxxx 5: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[0] 6: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[1] 7: MAD OUT[0], CONST[7], IN[0].wwww, TEMP[0] 8: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[1].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[2], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[2]; 9: MOV output[2], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[0], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[0]; 9: MOV output[2], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[1].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: MUL temp[0], const[4], input[0].xxxx; 5: MAD temp[1], const[5], input[0].yyyy, temp[0]; 6: MAD temp[0], const[6], input[0].zzzz, temp[1]; 7: MAD temp[0], const[7], input[0].wwww, temp[0]; 8: MOV output[0], temp[0]; 9: MOV output[2], temp[0]; Final vertex program code: 0: op: 0x00300002 dst: 0t op: VE_MULTIPLY src0: 0x01f90002 reg: 0c swiz: X/ Y/ U/ U src1: 0x01f80021 reg: 1i swiz: X/ X/ U/ U src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00302004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x01f90022 reg: 1c swiz: X/ Y/ U/ U src1: 0x01f92021 reg: 1i swiz: Y/ Y/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 2: op: 0x00300004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x01f90042 reg: 2c swiz: X/ Y/ U/ U src1: 0x01fa4021 reg: 1i swiz: Z/ Z/ U/ U src2: 0x01f90020 reg: 1t swiz: X/ Y/ U/ U 3: op: 0x00302204 dst: 1o op: VE_MULTIPLY_ADD src0: 0x01f90062 reg: 3c swiz: X/ Y/ U/ U src1: 0x01fb6021 reg: 1i swiz: W/ W/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 4: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 5: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 6: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 7: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 8: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 9: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], GENERIC[10], PERSPECTIVE DCL IN[1], GENERIC[11], PERSPECTIVE DCL IN[2], GENERIC[12], PERSPECTIVE DCL IN[3], GENERIC[13], PERSPECTIVE DCL IN[4], GENERIC[14], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL CONST[2..10] DCL TEMP[0..5] IMM FLT32 { -0.5000, 2.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[4].xyyy, SAMP[5], 2D 1: MUL TEMP[1].w, TEMP[0].xxxw, CONST[6].xxxx 2: TEX TEMP[2].xyz, IN[4].xyyy, SAMP[2], 2D 3: TEX TEMP[3].xyz, IN[4].xyyy, SAMP[3], 2D 4: MUL TEMP[4].xyz, TEMP[3].xyzz, CONST[10].xyzz 5: MAD TEMP[3].xyz, TEMP[2].xyzz, CONST[9].xyzz, TEMP[4].xyzz 6: ADD TEMP[2].xyz, TEMP[0].xyzz, TEMP[3].xyzz 7: TEX TEMP[0].xyz, IN[4].xyyy, SAMP[6], 2D 8: ADD TEMP[3].xyz, TEMP[0].xyzz, IMM[0].xxxy 9: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz 10: RSQ TEMP[4].x, TEMP[0].xxxx 11: MUL TEMP[0].xyz, TEMP[3].xyzz, TEMP[4].xxxx 12: MOV TEMP[3].xyz, -IN[3].xyzx 13: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[3].xyzz 14: MUL TEMP[5].xyz, TEMP[4].xxxx, TEMP[0].xyzz 15: MUL TEMP[0].xyz, IMM[0].yyyy, TEMP[5].xyzz 16: ADD TEMP[4].xyz, TEMP[3].xyzz, -TEMP[0].xyzz 17: MUL TEMP[0].xyz, TEMP[4].xxxx, IN[2].xyzz 18: MAD TEMP[3].xyz, TEMP[4].yyyy, IN[1].xyzz, TEMP[0].xyzz 19: MAD TEMP[0].xyz, TEMP[4].zzzz, IN[0].xyzz, TEMP[3].xyzz 20: TEX TEMP[3].xyz, IN[4].xyyy, SAMP[1], 2D 21: MUL TEMP[4], CONST[2], TEMP[0].xxxx 22: MAD TEMP[5].xyz, CONST[3], TEMP[0].yyyy, TEMP[4] 23: MAD TEMP[4].xyz, CONST[4], TEMP[0].zzzz, TEMP[5] 24: TEX TEMP[0].xyz, TEMP[4].xyzz, SAMP[0], CUBE 25: MAD TEMP[4].xyz, TEMP[3].xyzz, TEMP[0].xyzz, TEMP[2].xyzz 26: MUL TEMP[0].xyz, TEMP[4].xyzz, CONST[8].xyzz 27: TEX TEMP[2].xyz, IN[4].xyyy, SAMP[4], 2D 28: MAD TEMP[1].xyz, TEMP[2].xyzx, CONST[7].xyzx, TEMP[0].xyzx 29: MOV OUT[0], TEMP[1] 30: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[4].xyyy, 2D[5]; 1: MUL temp[1].w, temp[0].xxxw, const[6].xxxx; 2: TEX temp[2].xyz, input[4].xyyy, 2D[2]; 3: TEX temp[3].xyz, input[4].xyyy, 2D[3]; 4: MUL temp[4].xyz, temp[3].xyzz, const[10].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[9].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: TEX temp[0].xyz, input[4].xyyy, 2D[6]; 8: ADD temp[3].xyz, temp[0].xyzz, const[11].xxxy; 9: DP3 temp[0].x, temp[3].xyzz, temp[3].xyzz; 10: RSQ temp[4].x, temp[0].xxxx; 11: MUL temp[0].xyz, temp[3].xyzz, temp[4].xxxx; 12: MOV temp[3].xyz, -input[3].xyzx; 13: DP3 temp[4].x, temp[0].xyzz, temp[3].xyzz; 14: MUL temp[5].xyz, temp[4].xxxx, temp[0].xyzz; 15: MUL temp[0].xyz, const[11].yyyy, temp[5].xyzz; 16: ADD temp[4].xyz, temp[3].xyzz, -temp[0].xyzz; 17: MUL temp[0].xyz, temp[4].xxxx, input[2].xyzz; 18: MAD temp[3].xyz, temp[4].yyyy, input[1].xyzz, temp[0].xyzz; 19: MAD temp[0].xyz, temp[4].zzzz, input[0].xyzz, temp[3].xyzz; 20: TEX temp[3].xyz, input[4].xyyy, 2D[1]; 21: MUL temp[4], const[2], temp[0].xxxx; 22: MAD temp[5].xyz, const[3], temp[0].yyyy, temp[4]; 23: MAD temp[4].xyz, const[4], temp[0].zzzz, temp[5]; 24: TEX temp[0].xyz, temp[4].xyzz, CUBE[0]; 25: MAD temp[4].xyz, temp[3].xyzz, temp[0].xyzz, temp[2].xyzz; 26: MUL temp[0].xyz, temp[4].xyzz, const[8].xyzz; 27: TEX temp[2].xyz, input[4].xyyy, 2D[4]; 28: MAD temp[1].xyz, temp[2].xyzx, const[7].xyzx, temp[0].xyzx; 29: MOV output[0], temp[1]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[4].xyyy, 2D[5]; 1: MUL temp[1].w, temp[0].xxxw, const[6].xxxx; 2: TEX temp[2].xyz, input[4].xyyy, 2D[2]; 3: TEX temp[3].xyz, input[4].xyyy, 2D[3]; 4: MUL temp[4].xyz, temp[3].xyzz, const[10].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[9].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: TEX temp[0].xyz, input[4].xyyy, 2D[6]; 8: ADD temp[3].xyz, temp[0].xyzz, const[11].xxxy; 9: DP3 temp[0].x, temp[3].xyzz, temp[3].xyzz; 10: RSQ temp[4].x, temp[0].xxxx; 11: MUL temp[0].xyz, temp[3].xyzz, temp[4].xxxx; 12: MOV temp[3].xyz, -input[3].xyzx; 13: DP3 temp[4].x, temp[0].xyzz, temp[3].xyzz; 14: MUL temp[5].xyz, temp[4].xxxx, temp[0].xyzz; 15: MUL temp[0].xyz, const[11].yyyy, temp[5].xyzz; 16: ADD temp[4].xyz, temp[3].xyzz, -temp[0].xyzz; 17: MUL temp[0].xyz, temp[4].xxxx, input[2].xyzz; 18: MAD temp[3].xyz, temp[4].yyyy, input[1].xyzz, temp[0].xyzz; 19: MAD temp[0].xyz, temp[4].zzzz, input[0].xyzz, temp[3].xyzz; 20: TEX temp[3].xyz, input[4].xyyy, 2D[1]; 21: MUL temp[4], const[2], temp[0].xxxx; 22: MAD temp[5].xyz, const[3], temp[0].yyyy, temp[4]; 23: MAD temp[4].xyz, const[4], temp[0].zzzz, temp[5]; 24: TEX temp[0].xyz, temp[4].xyzz, CUBE[0]; 25: MAD temp[4].xyz, temp[3].xyzz, temp[0].xyzz, temp[2].xyzz; 26: MUL temp[0].xyz, temp[4].xyzz, const[8].xyzz; 27: TEX temp[2].xyz, input[4].xyyy, 2D[4]; 28: MAD temp[1].xyz, temp[2].xyzx, const[7].xyzx, temp[0].xyzx; 29: MOV output[0], temp[1]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[4].xyyy, 2D[5]; 1: MUL temp[1].w, temp[0].xxxw, const[6].xxxx; 2: TEX temp[2].xyz, input[4].xyyy, 2D[2]; 3: TEX temp[3].xyz, input[4].xyyy, 2D[3]; 4: MUL temp[4].xyz, temp[3].xyzz, const[10].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[9].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: TEX temp[0].xyz, input[4].xyyy, 2D[6]; 8: ADD temp[3].xyz, temp[0].xyzz, const[11].xxxy; 9: DP3 temp[0].x, temp[3].xyzz, temp[3].xyzz; 10: RSQ temp[4].x, temp[0].xxxx; 11: MUL temp[0].xyz, temp[3].xyzz, temp[4].xxxx; 12: MOV temp[3].xyz, -input[3].xyzx; 13: DP3 temp[4].x, temp[0].xyzz, temp[3].xyzz; 14: MUL temp[5].xyz, temp[4].xxxx, temp[0].xyzz; 15: MUL temp[0].xyz, const[11].yyyy, temp[5].xyzz; 16: ADD temp[4].xyz, temp[3].xyzz, -temp[0].xyzz; 17: MUL temp[0].xyz, temp[4].xxxx, input[2].xyzz; 18: MAD temp[3].xyz, temp[4].yyyy, input[1].xyzz, temp[0].xyzz; 19: MAD temp[0].xyz, temp[4].zzzz, input[0].xyzz, temp[3].xyzz; 20: TEX temp[3].xyz, input[4].xyyy, 2D[1]; 21: MUL temp[4], const[2], temp[0].xxxx; 22: MAD temp[5].xyz, const[3], temp[0].yyyy, temp[4]; 23: MAD temp[4].xyz, const[4], temp[0].zzzz, temp[5]; 24: TEX temp[0].xyz, temp[4].xyzz, CUBE[0]; 25: MAD temp[4].xyz, temp[3].xyzz, temp[0].xyzz, temp[2].xyzz; 26: MUL temp[0].xyz, temp[4].xyzz, const[8].xyzz; 27: TEX temp[2].xyz, input[4].xyyy, 2D[4]; 28: MAD temp[1].xyz, temp[2].xyzx, const[7].xyzx, temp[0].xyzx; 29: MOV output[0], temp[1]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[4].xyyy, 2D[5]; 1: MUL temp[1].w, temp[0].xxxw, const[6].xxxx; 2: TEX temp[2].xyz, input[4].xyyy, 2D[2]; 3: TEX temp[3].xyz, input[4].xyyy, 2D[3]; 4: MUL temp[4].xyz, temp[3].xyzz, const[10].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[9].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: TEX temp[0].xyz, input[4].xyyy, 2D[6]; 8: ADD temp[3].xyz, temp[0].xyzz, const[11].xxxy; 9: DP3 temp[0].x, temp[3].xyzz, temp[3].xyzz; 10: RSQ temp[4].x, temp[0].xxxx; 11: MUL temp[0].xyz, temp[3].xyzz, temp[4].xxxx; 12: MOV temp[3].xyz, -input[3].xyzx; 13: DP3 temp[4].x, temp[0].xyzz, temp[3].xyzz; 14: MUL temp[5].xyz, temp[4].xxxx, temp[0].xyzz; 15: MUL temp[0].xyz, const[11].yyyy, temp[5].xyzz; 16: ADD temp[4].xyz, temp[3].xyzz, -temp[0].xyzz; 17: MUL temp[0].xyz, temp[4].xxxx, input[2].xyzz; 18: MAD temp[3].xyz, temp[4].yyyy, input[1].xyzz, temp[0].xyzz; 19: MAD temp[0].xyz, temp[4].zzzz, input[0].xyzz, temp[3].xyzz; 20: TEX temp[3].xyz, input[4].xyyy, 2D[1]; 21: MUL temp[4], const[2], temp[0].xxxx; 22: MAD temp[5].xyz, const[3], temp[0].yyyy, temp[4]; 23: MAD temp[4].xyz, const[4], temp[0].zzzz, temp[5]; 24: TEX temp[0].xyz, temp[4].xyzz, CUBE[0]; 25: MAD temp[4].xyz, temp[3].xyzz, temp[0].xyzz, temp[2].xyzz; 26: MUL temp[0].xyz, temp[4].xyzz, const[8].xyzz; 27: TEX temp[2].xyz, input[4].xyyy, 2D[4]; 28: MAD temp[1].xyz, temp[2].xyzx, const[7].xyzx, temp[0].xyzx; 29: MOV output[0], temp[1]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[4].xyyy, 2D[5]; 1: MUL temp[1].w, temp[0].xxxw, const[6].xxxx; 2: TEX temp[2].xyz, input[4].xyyy, 2D[2]; 3: TEX temp[3].xyz, input[4].xyyy, 2D[3]; 4: MUL temp[4].xyz, temp[3].xyzz, const[10].xyzz; 5: MAD temp[3].xyz, temp[2].xyzz, const[9].xyzz, temp[4].xyzz; 6: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 7: TEX temp[0].xyz, input[4].xyyy, 2D[6]; 8: ADD temp[3].xyz, temp[0].xyzz, const[11].xxxy; 9: DP3 temp[0].x, temp[3].xyzz, temp[3].xyzz; 10: RSQ temp[4].x, temp[0].xxxx; 11: MUL temp[0].xyz, temp[3].xyzz, temp[4].xxxx; 12: MOV temp[3].xyz, -input[3].xyzx; 13: DP3 temp[4].x, temp[0].xyzz, temp[3].xyzz; 14: MUL temp[5].xyz, temp[4].xxxx, temp[0].xyzz; 15: MUL temp[0].xyz, const[11].yyyy, temp[5].xyzz; 16: ADD temp[4].xyz, temp[3].xyzz, -temp[0].xyzz; 17: MUL temp[0].xyz, temp[4].xxxx, input[2].xyzz; 18: MAD temp[3].xyz, temp[4].yyyy, input[1].xyzz, temp[0].xyzz; 19: MAD temp[0].xyz, temp[4].zzzz, input[0].xyzz, temp[3].xyzz; 20: TEX temp[3].xyz, input[4].xyyy, 2D[1]; 21: MUL temp[4], const[2], temp[0].xxxx; 22: MAD temp[5].xyz, const[3], temp[0].yyyy, temp[4]; 23: MAD temp[4].xyz, const[4], temp[0].zzzz, temp[5]; 24: TEX temp[0].xyz, temp[4].xyzz, CUBE[0]; 25: MAD temp[4].xyz, temp[3].xyzz, temp[0].xyzz, temp[2].xyzz; 26: MUL temp[0].xyz, temp[4].xyzz, const[8].xyzz; 27: TEX temp[2].xyz, input[4].xyyy, 2D[4]; 28: MAD temp[1].xyz, temp[2].xyzx, const[7].xyzx, temp[0].xyzx; 29: MOV output[0], temp[1]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[4].xyyy, 2D[5]; 1: MUL temp[1].w, temp[0].xxxw, const[6].xxxx; 2: TEX temp[6], input[4].xyyy, 2D[2]; 3: MOV temp[2].xyz, temp[6]; 4: TEX temp[7], input[4].xyyy, 2D[3]; 5: MOV temp[3].xyz, temp[7]; 6: MUL temp[4].xyz, temp[3].xyzz, const[10].xyzz; 7: MAD temp[3].xyz, temp[2].xyzz, const[9].xyzz, temp[4].xyzz; 8: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 9: TEX temp[8], input[4].xyyy, 2D[6]; 10: MOV temp[0].xyz, temp[8]; 11: ADD temp[3].xyz, temp[0].xyzz, const[11].xxxy; 12: DP3 temp[0].x, temp[3].xyzz, temp[3].xyzz; 13: RSQ temp[4].x, temp[0].xxxx; 14: MUL temp[0].xyz, temp[3].xyzz, temp[4].xxxx; 15: MOV temp[3].xyz, -input[3].xyzx; 16: DP3 temp[4].x, temp[0].xyzz, temp[3].xyzz; 17: MUL temp[5].xyz, temp[4].xxxx, temp[0].xyzz; 18: MUL temp[0].xyz, const[11].yyyy, temp[5].xyzz; 19: ADD temp[4].xyz, temp[3].xyzz, -temp[0].xyzz; 20: MUL temp[0].xyz, temp[4].xxxx, input[2].xyzz; 21: MAD temp[3].xyz, temp[4].yyyy, input[1].xyzz, temp[0].xyzz; 22: MAD temp[0].xyz, temp[4].zzzz, input[0].xyzz, temp[3].xyzz; 23: TEX temp[9], input[4].xyyy, 2D[1]; 24: MOV temp[3].xyz, temp[9]; 25: MUL temp[4], const[2], temp[0].xxxx; 26: MAD temp[5].xyz, const[3], temp[0].yyyy, temp[4]; 27: MAD temp[4].xyz, const[4], temp[0].zzzz, temp[5]; 28: TEX temp[10], temp[4].xyzz, CUBE[0]; 29: MOV temp[0].xyz, temp[10]; 30: MAD temp[4].xyz, temp[3].xyzz, temp[0].xyzz, temp[2].xyzz; 31: MUL temp[0].xyz, temp[4].xyzz, const[8].xyzz; 32: TEX temp[11], input[4].xyyy, 2D[4]; 33: MOV temp[2].xyz, temp[11]; 34: MAD temp[1].xyz, temp[2].xyzx, const[7].xyzx, temp[0].xyzx; 35: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[4].xyyy, 2D[5]; 1: MUL temp[1].w, temp[0].xxxw, const[6].xxxx; 2: TEX temp[6], input[4].xyyy, 2D[2]; 3: MOV temp[2].xyz, temp[6]; 4: TEX temp[7], input[4].xyyy, 2D[3]; 5: MOV temp[3].xyz, temp[7]; 6: MUL temp[4].xyz, temp[3].xyzz, const[10].xyzz; 7: MAD temp[3].xyz, temp[2].xyzz, const[9].xyzz, temp[4].xyzz; 8: ADD temp[2].xyz, temp[0].xyzz, temp[3].xyzz; 9: TEX temp[8], input[4].xyyy, 2D[6]; 10: MOV temp[0].xyz, temp[8]; 11: ADD temp[3].xyz, temp[0].xyzz, const[11].xxxy; 12: DP3 temp[0].x, temp[3].xyzz, temp[3].xyzz; 13: RSQ temp[4].x, |temp[0].xxxx|; 14: MUL temp[0].xyz, temp[3].xyzz, temp[4].xxxx; 15: MOV temp[3].xyz, -input[3].xyzx; 16: DP3 temp[4].x, temp[0].xyzz, temp[3].xyzz; 17: MUL temp[5].xyz, temp[4].xxxx, temp[0].xyzz; 18: MUL temp[0].xyz, const[11].yyyy, temp[5].xyzz; 19: ADD temp[4].xyz, temp[3].xyzz, -temp[0].xyzz; 20: MUL temp[0].xyz, temp[4].xxxx, input[2].xyzz; 21: MAD temp[3].xyz, temp[4].yyyy, input[1].xyzz, temp[0].xyzz; 22: MAD temp[0].xyz, temp[4].zzzz, input[0].xyzz, temp[3].xyzz; 23: TEX temp[9], input[4].xyyy, 2D[1]; 24: MOV temp[3].xyz, temp[9]; 25: MUL temp[4], const[2], temp[0].xxxx; 26: MAD temp[5].xyz, const[3], temp[0].yyyy, temp[4]; 27: MAD temp[4].xyz, const[4], temp[0].zzzz, temp[5]; 28: TEX temp[10], temp[4].xyzz, CUBE[0]; 29: MOV temp[0].xyz, temp[10]; 30: MAD temp[4].xyz, temp[3].xyzz, temp[0].xyzz, temp[2].xyzz; 31: MUL temp[0].xyz, temp[4].xyzz, const[8].xyzz; 32: TEX temp[11], input[4].xyyy, 2D[4]; 33: MOV temp[2].xyz, temp[11]; 34: MAD temp[1].xyz, temp[2].xyzx, const[7].xyzx, temp[0].xyzx; 35: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[4].xy__, 2D[5]; 1: MUL temp[1].w, temp[0].___w, const[6].___x; 2: TEX temp[6].xyz, input[4].xy__, 2D[2]; 3: MOV temp[2].xyz, temp[6].xyz_; 4: TEX temp[7].xyz, input[4].xy__, 2D[3]; 5: MOV temp[3].xyz, temp[7].xyz_; 6: MUL temp[4].xyz, temp[3].xyz_, const[10].xyz_; 7: MAD temp[3].xyz, temp[2].xyz_, const[9].xyz_, temp[4].xyz_; 8: ADD temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 9: TEX temp[8].xyz, input[4].xy__, 2D[6]; 10: MOV temp[0].xyz, temp[8].xyz_; 11: ADD temp[3].xyz, temp[0].xyz_, const[11].xxx_; 12: DP3 temp[0].x, temp[3].xyz_, temp[3].xyz_; 13: RSQ temp[4].x, |temp[0].x___|; 14: MUL temp[0].xyz, temp[3].xyz_, temp[4].xxx_; 15: MOV temp[3].xyz, -input[3].xyz_; 16: DP3 temp[4].x, temp[0].xyz_, temp[3].xyz_; 17: MUL temp[5].xyz, temp[4].xxx_, temp[0].xyz_; 18: MUL temp[0].xyz, const[11].yyy_, temp[5].xyz_; 19: ADD temp[4].xyz, temp[3].xyz_, -temp[0].xyz_; 20: MUL temp[0].xyz, temp[4].xxx_, input[2].xyz_; 21: MAD temp[3].xyz, temp[4].yyy_, input[1].xyz_, temp[0].xyz_; 22: MAD temp[0].xyz, temp[4].zzz_, input[0].xyz_, temp[3].xyz_; 23: TEX temp[9].xyz, input[4].xy__, 2D[1]; 24: MOV temp[3].xyz, temp[9].xyz_; 25: MUL temp[4].xyz, const[2].xyz_, temp[0].xxx_; 26: MAD temp[5].xyz, const[3].xyz_, temp[0].yyy_, temp[4].xyz_; 27: MAD temp[4].xyz, const[4].xyz_, temp[0].zzz_, temp[5].xyz_; 28: TEX temp[10].xyz, temp[4].xyz_, CUBE[0]; 29: MOV temp[0].xyz, temp[10].xyz_; 30: MAD temp[4].xyz, temp[3].xyz_, temp[0].xyz_, temp[2].xyz_; 31: MUL temp[0].xyz, temp[4].xyz_, const[8].xyz_; 32: TEX temp[11].xyz, input[4].xy__, 2D[4]; 33: MOV temp[2].xyz, temp[11].xyz_; 34: MAD temp[1].xyz, temp[2].xyz_, const[7].xyz_, temp[0].xyz_; 35: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[4].xy__, 2D[5]; 1: MUL temp[1].w, temp[0].___w, const[6].___x; 2: TEX temp[6].xyz, input[4].xy__, 2D[2]; 3: MOV temp[2].xyz, temp[6].xyz_; 4: TEX temp[7].xyz, input[4].xy__, 2D[3]; 5: MOV temp[3].xyz, temp[7].xyz_; 6: MUL temp[4].xyz, temp[3].xyz_, const[10].xyz_; 7: MAD temp[3].xyz, temp[2].xyz_, const[9].xyz_, temp[4].xyz_; 8: ADD temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 9: TEX temp[8].xyz, input[4].xy__, 2D[6]; 10: MOV temp[0].xyz, temp[8].xyz_; 11: ADD temp[3].xyz, temp[0].xyz_, const[11].xxx_; 12: DP3 temp[0].x, temp[3].xyz_, temp[3].xyz_; 13: RSQ temp[4].x, |temp[0].x___|; 14: MUL temp[0].xyz, temp[3].xyz_, temp[4].xxx_; 15: MOV temp[3].xyz, -input[3].xyz_; 16: DP3 temp[4].x, temp[0].xyz_, temp[3].xyz_; 17: MUL temp[5].xyz, temp[4].xxx_, temp[0].xyz_; 18: MUL temp[0].xyz, const[11].yyy_, temp[5].xyz_; 19: ADD temp[4].xyz, temp[3].xyz_, -temp[0].xyz_; 20: MUL temp[0].xyz, temp[4].xxx_, input[2].xyz_; 21: MAD temp[3].xyz, temp[4].yyy_, input[1].xyz_, temp[0].xyz_; 22: MAD temp[0].xyz, temp[4].zzz_, input[0].xyz_, temp[3].xyz_; 23: TEX temp[9].xyz, input[4].xy__, 2D[1]; 24: MOV temp[3].xyz, temp[9].xyz_; 25: MUL temp[4].xyz, const[2].xyz_, temp[0].xxx_; 26: MAD temp[5].xyz, const[3].xyz_, temp[0].yyy_, temp[4].xyz_; 27: MAD temp[4].xyz, const[4].xyz_, temp[0].zzz_, temp[5].xyz_; 28: TEX temp[10].xyz, temp[4].xyz_, CUBE[0]; 29: MOV temp[0].xyz, temp[10].xyz_; 30: MAD temp[4].xyz, temp[3].xyz_, temp[0].xyz_, temp[2].xyz_; 31: MUL temp[0].xyz, temp[4].xyz_, const[8].xyz_; 32: TEX temp[11].xyz, input[4].xy__, 2D[4]; 33: MOV temp[2].xyz, temp[11].xyz_; 34: MAD temp[1].xyz, temp[2].xyz_, const[7].xyz_, temp[0].xyz_; 35: MOV output[0], temp[1]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[4].xy__, 2D[5]; 1: MUL temp[1].w, temp[0].___w, const[6].___x; 2: TEX temp[6].xyz, input[4].xy__, 2D[2]; 3: TEX temp[7].xyz, input[4].xy__, 2D[3]; 4: MUL temp[4].xyz, temp[7].xyz_, const[10].xyz_; 5: MAD temp[3].xyz, temp[6].xyz_, const[9].xyz_, temp[4].xyz_; 6: ADD temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 7: TEX temp[8].xyz, input[4].xy__, 2D[6]; 8: ADD temp[3].xyz, temp[8].xyz_, none.-H-H-H_; 9: DP3 temp[0].x, temp[3].xyz_, temp[3].xyz_; 10: RSQ temp[4].x, |temp[0].x___|; 11: MUL temp[0].xyz, temp[3].xyz_, temp[4].xxx_; 12: DP3 temp[4].x, temp[0].xyz_, input[3].-x-y-z_; 13: MUL temp[5].xyz, temp[4].xxx_, temp[0].xyz_; 14: MUL temp[0].xyz, const[11].yyy_, temp[5].xyz_; 15: ADD temp[4].xyz, input[3].-x-y-z_, -temp[0].xyz_; 16: MUL temp[0].xyz, temp[4].xxx_, input[2].xyz_; 17: MAD temp[3].xyz, temp[4].yyy_, input[1].xyz_, temp[0].xyz_; 18: MAD temp[0].xyz, temp[4].zzz_, input[0].xyz_, temp[3].xyz_; 19: TEX temp[9].xyz, input[4].xy__, 2D[1]; 20: MUL temp[4].xyz, const[2].xyz_, temp[0].xxx_; 21: MAD temp[5].xyz, const[3].xyz_, temp[0].yyy_, temp[4].xyz_; 22: MAD temp[4].xyz, const[4].xyz_, temp[0].zzz_, temp[5].xyz_; 23: TEX temp[10].xyz, temp[4].xyz_, CUBE[0]; 24: MAD temp[4].xyz, temp[9].xyz_, temp[10].xyz_, temp[2].xyz_; 25: MUL temp[0].xyz, temp[4].xyz_, const[8].xyz_; 26: TEX temp[11].xyz, input[4].xy__, 2D[4]; 27: MAD temp[1].xyz, temp[11].xyz_, const[7].xyz_, temp[0].xyz_; 28: MOV output[0], temp[1]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[4].xy__, 2D[5]; 1: MUL temp[1].w, temp[0].___w, const[6].___x; 2: TEX temp[6].xyz, input[4].xy__, 2D[2]; 3: TEX temp[7].xyz, input[4].xy__, 2D[3]; 4: MUL temp[4].xyz, temp[7].xyz_, const[10].xyz_; 5: MAD temp[3].xyz, temp[6].xyz_, const[9].xyz_, temp[4].xyz_; 6: ADD temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 7: TEX temp[8].xyz, input[4].xy__, 2D[6]; 8: ADD temp[3].xyz, temp[8].xyz_, none.-H-H-H_; 9: DP3 temp[0].x, temp[3].xyz_, temp[3].xyz_; 10: RSQ temp[4].x, |temp[0].x___|; 11: MUL temp[0].xyz, temp[3].xyz_, temp[4].xxx_; 12: DP3 temp[4].x, temp[0].xyz_, input[3].-x-y-z_; 13: MUL temp[5].xyz, temp[4].xxx_, temp[0].xyz_; 14: MUL temp[0].xyz, const[11].yyy_, temp[5].xyz_; 15: ADD temp[4].xyz, input[3].-x-y-z_, -temp[0].xyz_; 16: MUL temp[0].xyz, temp[4].xxx_, input[2].xyz_; 17: MAD temp[3].xyz, temp[4].yyy_, input[1].xyz_, temp[0].xyz_; 18: MAD temp[0].xyz, temp[4].zzz_, input[0].xyz_, temp[3].xyz_; 19: TEX temp[9].xyz, input[4].xy__, 2D[1]; 20: MUL temp[4].xyz, const[2].xyz_, temp[0].xxx_; 21: MAD temp[5].xyz, const[3].xyz_, temp[0].yyy_, temp[4].xyz_; 22: MAD temp[4].xyz, const[4].xyz_, temp[0].zzz_, temp[5].xyz_; 23: TEX temp[10].xyz, temp[4].xyz_, CUBE[0]; 24: MAD temp[4].xyz, temp[9].xyz_, temp[10].xyz_, temp[2].xyz_; 25: MUL temp[0].xyz, temp[4].xyz_, const[8].xyz_; 26: TEX temp[11].xyz, input[4].xy__, 2D[4]; 27: MAD temp[1].xyz, temp[11].xyz_, const[7].xyz_, temp[0].xyz_; 28: MOV output[0], temp[1]; CONST[8] = { -0.5000 2.0000 0.0000 0.0000 } Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[4].xy__, 2D[5]; 1: MUL temp[1].w, temp[0].___w, const[3].___x; 2: TEX temp[6].xyz, input[4].xy__, 2D[2]; 3: TEX temp[7].xyz, input[4].xy__, 2D[3]; 4: MUL temp[4].xyz, temp[7].xyz_, const[7].xyz_; 5: MAD temp[3].xyz, temp[6].xyz_, const[6].xyz_, temp[4].xyz_; 6: ADD temp[2].xyz, temp[0].xyz_, temp[3].xyz_; 7: TEX temp[8].xyz, input[4].xy__, 2D[6]; 8: ADD temp[3].xyz, temp[8].xyz_, none.-H-H-H_; 9: DP3 temp[0].x, temp[3].xyz_, temp[3].xyz_; 10: RSQ temp[4].x, |temp[0].x___|; 11: MUL temp[0].xyz, temp[3].xyz_, temp[4].xxx_; 12: DP3 temp[4].x, temp[0].xyz_, input[3].-x-y-z_; 13: MUL temp[5].xyz, temp[4].xxx_, temp[0].xyz_; 14: MUL temp[0].xyz, const[8].yyy_, temp[5].xyz_; 15: ADD temp[4].xyz, input[3].-x-y-z_, -temp[0].xyz_; 16: MUL temp[0].xyz, temp[4].xxx_, input[2].xyz_; 17: MAD temp[3].xyz, temp[4].yyy_, input[1].xyz_, temp[0].xyz_; 18: MAD temp[0].xyz, temp[4].zzz_, input[0].xyz_, temp[3].xyz_; 19: TEX temp[9].xyz, input[4].xy__, 2D[1]; 20: MUL temp[4].xyz, const[0].xyz_, temp[0].xxx_; 21: MAD temp[5].xyz, const[1].xyz_, temp[0].yyy_, temp[4].xyz_; 22: MAD temp[4].xyz, const[2].xyz_, temp[0].zzz_, temp[5].xyz_; 23: TEX temp[10].xyz, temp[4].xyz_, CUBE[0]; 24: MAD temp[4].xyz, temp[9].xyz_, temp[10].xyz_, temp[2].xyz_; 25: MUL temp[0].xyz, temp[4].xyz_, const[5].xyz_; 26: TEX temp[11].xyz, input[4].xy__, 2D[4]; 27: MAD temp[1].xyz, temp[11].xyz_, const[4].xyz_, temp[0].xyz_; 28: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[4].xy__, 2D[5]; 1: MUL temp[1].w, temp[0].___w, const[3].___x; 2: TEX temp[2].xyz, input[4].xy__, 2D[2]; 3: TEX temp[3].xyz, input[4].xy__, 2D[3]; 4: MUL temp[4].xyz, temp[3].xyz_, const[7].xyz_; 5: MAD temp[5].xyz, temp[2].xyz_, const[6].xyz_, temp[4].xyz_; 6: ADD temp[6].xyz, temp[0].xyz_, temp[5].xyz_; 7: TEX temp[7].xyz, input[4].xy__, 2D[6]; 8: ADD temp[8].xyz, temp[7].xyz_, none.-H-H-H_; 9: DP3 temp[0].x, temp[8].xyz_, temp[8].xyz_; 10: RSQ temp[4].x, |temp[0].x___|; 11: MUL temp[0].xyz, temp[8].xyz_, temp[4].xxx_; 12: DP3 temp[4].x, temp[0].xyz_, input[3].-x-y-z_; 13: MUL temp[9].xyz, temp[4].xxx_, temp[0].xyz_; 14: MUL temp[0].xyz, const[8].yyy_, temp[9].xyz_; 15: ADD temp[10].xyz, input[3].-x-y-z_, -temp[0].xyz_; 16: MUL temp[0].xyz, temp[10].xxx_, input[2].xyz_; 17: MAD temp[11].xyz, temp[10].yyy_, input[1].xyz_, temp[0].xyz_; 18: MAD temp[0].xyz, temp[10].zzz_, input[0].xyz_, temp[11].xyz_; 19: TEX temp[12].xyz, input[4].xy__, 2D[1]; 20: MUL temp[13].xyz, const[0].xyz_, temp[0].xxx_; 21: MAD temp[14].xyz, const[1].xyz_, temp[0].yyy_, temp[13].xyz_; 22: MAD temp[15].xyz, const[2].xyz_, temp[0].zzz_, temp[14].xyz_; 23: TEX temp[16].xyz, temp[15].xyz_, CUBE[0]; 24: MAD temp[17].xyz, temp[12].xyz_, temp[16].xyz_, temp[6].xyz_; 25: MUL temp[0].xyz, temp[17].xyz_, const[5].xyz_; 26: TEX temp[18].xyz, input[4].xy__, 2D[4]; 27: MAD temp[1].xyz, temp[18].xyz_, const[4].xyz_, temp[0].xyz_; 28: MOV output[0], temp[1]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[4].xy__, 2D[5]; 1: src0.xyz = const[3], src0.w = temp[0] MAD temp[1].w, src0.w, src0.x, src0.0 2: TEX temp[2].xyz, input[4].xy__, 2D[2]; 3: TEX temp[3].xyz, input[4].xy__, 2D[3]; 4: src0.xyz = temp[3], src1.xyz = const[7] MAD temp[4].xyz, src0.xyz, src1.xyz, src0.000 5: src0.xyz = temp[2], src1.xyz = const[6], src2.xyz = temp[4] MAD temp[5].xyz, src0.xyz, src1.xyz, src2.xyz 6: src0.xyz = temp[0], src1.xyz = temp[5] MAD temp[6].xyz, src0.xyz, src0.111, src1.xyz 7: TEX temp[7].xyz, input[4].xy__, 2D[6]; 8: src0.xyz = temp[7] MAD temp[8].xyz, src0.xyz, src0.111, -src0.HHH 9: src0.xyz = temp[8] DP3 temp[0].x, src0.xyz, src0.xyz 10: src0.xyz = temp[0] REPL_ALPHA temp[4].x RSQ, |src0.x| 11: src0.xyz = temp[8], src1.xyz = temp[4] MAD temp[0].xyz, src0.xyz, src1.xxx, src0.000 12: src0.xyz = temp[0], src1.xyz = input[3] DP3 temp[4].x, src0.xyz, -src1.xyz 13: src0.xyz = temp[4], src1.xyz = temp[0] MAD temp[9].xyz, src0.xxx, src1.xyz, src0.000 14: src0.xyz = const[8], src1.xyz = temp[9] MAD temp[0].xyz, src0.yyy, src1.xyz, src0.000 15: src0.xyz = input[3], src1.xyz = temp[0] MAD temp[10].xyz, -src0.xyz, src0.111, -src1.xyz 16: src0.xyz = temp[10], src1.xyz = input[2] MAD temp[0].xyz, src0.xxx, src1.xyz, src0.000 17: src0.xyz = temp[10], src1.xyz = input[1], src2.xyz = temp[0] MAD temp[11].xyz, src0.yyy, src1.xyz, src2.xyz 18: src0.xyz = temp[10], src1.xyz = input[0], src2.xyz = temp[11] MAD temp[0].xyz, src0.zzz, src1.xyz, src2.xyz 19: TEX temp[12].xyz, input[4].xy__, 2D[1]; 20: src0.xyz = const[0], src1.xyz = temp[0] MAD temp[13].xyz, src0.xyz, src1.xxx, src0.000 21: src0.xyz = const[1], src1.xyz = temp[0], src2.xyz = temp[13] MAD temp[14].xyz, src0.xyz, src1.yyy, src2.xyz 22: src0.xyz = const[2], src1.xyz = temp[0], src2.xyz = temp[14] MAD temp[15].xyz, src0.xyz, src1.zzz, src2.xyz 23: TEX temp[16].xyz, temp[15].xyz_, CUBE[0]; 24: src0.xyz = temp[12], src1.xyz = temp[16], src2.xyz = temp[6] MAD temp[17].xyz, src0.xyz, src1.xyz, src2.xyz 25: src0.xyz = temp[17], src1.xyz = const[5] MAD temp[0].xyz, src0.xyz, src1.xyz, src0.000 26: TEX temp[18].xyz, input[4].xy__, 2D[4]; 27: src0.xyz = temp[18], src1.xyz = const[4], src2.xyz = temp[0] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 28: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[4].xy__, 2D[5]; 2: TEX temp[2].xyz, input[4].xy__, 2D[2]; 3: TEX temp[3].xyz, input[4].xy__, 2D[3]; 4: TEX temp[7].xyz, input[4].xy__, 2D[6]; 5: TEX temp[12].xyz, input[4].xy__, 2D[1]; 6: TEX temp[18].xyz, input[4].xy__, 2D[4]; 7: src0.xyz = temp[7], src0.w = temp[0], src1.xyz = const[3] MAD temp[8].xyz, src0.xyz, src0.111, -src0.HHH MAD temp[1].w, src0.w, src1.x, src0.0 8: src0.xyz = temp[3], src1.xyz = const[7] MAD temp[4].xyz, src0.xyz, src1.xyz, src0.000 9: src0.xyz = temp[2], src1.xyz = const[6], src2.xyz = temp[4] MAD temp[5].xyz, src0.xyz, src1.xyz, src2.xyz 10: src0.xyz = temp[0], src1.xyz = temp[5] MAD temp[6].xyz, src0.xyz, src0.111, src1.xyz 11: src0.xyz = temp[8] DP3 temp[0].x, src0.xyz, src0.xyz 12: src0.xyz = temp[0] REPL_ALPHA temp[4].x RSQ, |src0.x| 13: src0.xyz = temp[8], src1.xyz = temp[4] MAD temp[0].xyz, src0.xyz, src1.xxx, src0.000 14: src0.xyz = temp[0], src1.xyz = input[3] DP3 temp[4].x, src0.xyz, -src1.xyz 15: src0.xyz = temp[4], src1.xyz = temp[0] MAD temp[9].xyz, src0.xxx, src1.xyz, src0.000 16: src0.xyz = const[8], src1.xyz = temp[9] MAD temp[0].xyz, src0.yyy, src1.xyz, src0.000 17: src0.xyz = input[3], src1.xyz = temp[0] MAD temp[10].xyz, -src0.xyz, src0.111, -src1.xyz 18: src0.xyz = temp[10], src1.xyz = input[2] MAD temp[0].xyz, src0.xxx, src1.xyz, src0.000 19: src0.xyz = temp[10], src1.xyz = input[1], src2.xyz = temp[0] MAD temp[11].xyz, src0.yyy, src1.xyz, src2.xyz 20: src0.xyz = temp[10], src1.xyz = input[0], src2.xyz = temp[11] MAD temp[0].xyz, src0.zzz, src1.xyz, src2.xyz 21: src0.xyz = const[0], src1.xyz = temp[0] MAD temp[13].xyz, src0.xyz, src1.xxx, src0.000 22: src0.xyz = const[1], src1.xyz = temp[0], src2.xyz = temp[13] MAD temp[14].xyz, src0.xyz, src1.yyy, src2.xyz 23: src0.xyz = const[2], src1.xyz = temp[0], src2.xyz = temp[14] MAD temp[15].xyz, src0.xyz, src1.zzz, src2.xyz 24: BEGIN_TEX; 25: TEX temp[16].xyz, temp[15].xyz_, CUBE[0]; 26: src0.xyz = temp[12], src1.xyz = temp[16], src2.xyz = temp[6] MAD temp[17].xyz, src0.xyz, src1.xyz, src2.xyz 27: src0.xyz = temp[17], src1.xyz = const[5] MAD temp[0].xyz, src0.xyz, src1.xyz, src0.000 28: src0.xyz = temp[18], src1.xyz = const[4], src2.xyz = temp[0] MAD temp[1].xyz, src0.xyz, src1.xyz, src2.xyz 29: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[5], temp[4].xy__, 2D[5]; 2: TEX temp[6].xyz, temp[4].xy__, 2D[2]; 3: TEX temp[7].xyz, temp[4].xy__, 2D[3]; 4: TEX temp[8].xyz, temp[4].xy__, 2D[6]; 5: TEX temp[9].xyz, temp[4].xy__, 2D[1]; 6: TEX temp[10].xyz, temp[4].xy__, 2D[4]; 7: src0.xyz = temp[8], src0.w = temp[5], src1.xyz = const[3] MAD temp[8].xyz, src0.xyz, src0.111, -src0.HHH MAD temp[4].w, src0.w, src1.x, src0.0 8: src0.xyz = temp[7], src1.xyz = const[7] MAD temp[7].xyz, src0.xyz, src1.xyz, src0.000 9: src0.xyz = temp[6], src1.xyz = const[6], src2.xyz = temp[7] MAD temp[6].xyz, src0.xyz, src1.xyz, src2.xyz 10: src0.xyz = temp[5], src1.xyz = temp[6] MAD temp[6].xyz, src0.xyz, src0.111, src1.xyz 11: src0.xyz = temp[8] DP3 temp[5].x, src0.xyz, src0.xyz 12: src0.xyz = temp[5] REPL_ALPHA temp[7].x RSQ, |src0.x| 13: src0.xyz = temp[8], src1.xyz = temp[7] MAD temp[5].xyz, src0.xyz, src1.xxx, src0.000 14: src0.xyz = temp[5], src1.xyz = temp[3] DP3 temp[7].x, src0.xyz, -src1.xyz 15: src0.xyz = temp[7], src1.xyz = temp[5] MAD temp[7].xyz, src0.xxx, src1.xyz, src0.000 16: src0.xyz = const[8], src1.xyz = temp[7] MAD temp[5].xyz, src0.yyy, src1.xyz, src0.000 17: src0.xyz = temp[3], src1.xyz = temp[5] MAD temp[3].xyz, -src0.xyz, src0.111, -src1.xyz 18: src0.xyz = temp[3], src1.xyz = temp[2] MAD temp[5].xyz, src0.xxx, src1.xyz, src0.000 19: src0.xyz = temp[3], src1.xyz = temp[1], src2.xyz = temp[5] MAD temp[1].xyz, src0.yyy, src1.xyz, src2.xyz 20: src0.xyz = temp[3], src1.xyz = temp[0], src2.xyz = temp[1] MAD temp[5].xyz, src0.zzz, src1.xyz, src2.xyz 21: src0.xyz = const[0], src1.xyz = temp[5] MAD temp[0].xyz, src0.xyz, src1.xxx, src0.000 22: src0.xyz = const[1], src1.xyz = temp[5], src2.xyz = temp[0] MAD temp[0].xyz, src0.xyz, src1.yyy, src2.xyz 23: src0.xyz = const[2], src1.xyz = temp[5], src2.xyz = temp[0] MAD temp[0].xyz, src0.xyz, src1.zzz, src2.xyz 24: BEGIN_TEX; 25: TEX temp[0].xyz, temp[0].xyz_, CUBE[0]; 26: src0.xyz = temp[9], src1.xyz = temp[0], src2.xyz = temp[6] MAD temp[0].xyz, src0.xyz, src1.xyz, src2.xyz 27: src0.xyz = temp[0], src1.xyz = const[5] MAD temp[5].xyz, src0.xyz, src1.xyz, src0.000 28: src0.xyz = temp[10], src1.xyz = const[4], src2.xyz = temp[5] MAD temp[4].xyz, src0.xyz, src1.xyz, src2.xyz 29: src0.xyz = temp[4], src0.w = temp[4] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=21************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 16, tex_end: 5 (code_addr: 000a0400) TEX: TEX t5, t4, texture[5] (0000a944) TEX t6, t4, texture[2] (00009184) TEX t7, t4, texture[3] (000099c4) TEX t8, t4, texture[6] (0000b204) TEX t9, t4, texture[1] (00008a44) TEX t10, t4, texture[4] (0000a284) 0: xyz: t8 c3 t0 bias-> t8.xyz (03a008c8) w: t5 t0 t0 bias-> t4.w (00900005) xyz: t8.xyz 1.0 -0.5 op: 000d8a80 w: t5.w c3.x 0.0 op: 00040189 1: xyz: t7 c7 t0 bias-> t7.xyz (039c09c7) w: t0 t0 t0 bias-> (00000000) xyz: t7.xyz c7.xyz 0.0 op: 00050200 w: t7.x t7.x t7.x op: 00000000 2: xyz: t6 c6 t7 bias-> t6.xyz (03987986) w: t0 t0 t0 bias-> (00000000) xyz: t6.xyz c6.xyz t7.xyz op: 00020200 w: t6.x t6.x t6.x op: 00000000 3: xyz: t5 t6 t0 bias-> t6.xyz (03980185) w: t0 t0 t0 bias-> (00000000) xyz: t5.xyz 1.0 t6.xyz op: 00010a80 w: t5.x t5.x t5.x op: 00000000 4: xyz: t8 t0 t0 bias-> t5.x (00940008) w: t0 t0 t0 bias-> (00000000) xyz: t8.xyz t8.xyz t8.xxx op: 00804000 w: t8.x t8.x t8.x op: 00000000 5: xyz: t5 t0 t0 bias-> t7.x (009c0005) w: t0 t0 t0 bias-> (00000000) xyz: t5.xxx t5.xxx t5.xxx op: 05004081 w: |t5.x| t5.x t5.x op: 05800040 6: xyz: t8 t7 t0 bias-> t5.xyz (039401c8) w: t0 t0 t0 bias-> (00000000) xyz: t8.xyz t7.xxx 0.0 op: 00050280 w: t8.x t8.x t8.x op: 00000000 7: xyz: t5 t3 t0 bias-> t7.x (009c00c5) w: t0 t0 t0 bias-> (00000000) xyz: t5.xyz -t3.xyz t5.xxx op: 00805200 w: t5.x t5.x t5.x op: 00000000 8: xyz: t7 t5 t0 bias-> t7.xyz (039c0147) w: t0 t0 t0 bias-> (00000000) xyz: t7.xxx t5.xyz 0.0 op: 00050201 w: t7.x t7.x t7.x op: 00000000 9: xyz: c8 t7 t0 bias-> t5.xyz (039401e8) w: t0 t0 t0 bias-> (00000000) xyz: c8.yyy t7.xyz 0.0 op: 00050202 w: c8.x c8.x c8.x op: 00000000 10: xyz: t3 t5 t0 bias-> t3.xyz (038c0143) w: t0 t0 t0 bias-> (00000000) xyz: -t3.xyz 1.0 -t5.xyz op: 00090aa0 w: t3.x t3.x t3.x op: 00000000 11: xyz: t3 t2 t0 bias-> t5.xyz (03940083) w: t0 t0 t0 bias-> (00000000) xyz: t3.xxx t2.xyz 0.0 op: 00050201 w: t3.x t3.x t3.x op: 00000000 12: xyz: t3 t1 t5 bias-> t1.xyz (03845043) w: t0 t0 t0 bias-> (00000000) xyz: t3.yyy t1.xyz t5.xyz op: 00020202 w: t3.x t3.x t3.x op: 00000000 13: xyz: t3 t0 t1 bias-> t5.xyz (03941003) w: t0 t0 t0 bias-> (00000000) xyz: t3.zzz t0.xyz t1.xyz op: 00020203 w: t3.x t3.x t3.x op: 00000000 14: xyz: c0 t5 t0 bias-> t0.xyz (03800160) w: t0 t0 t0 bias-> (00000000) xyz: c0.xyz t5.xxx 0.0 op: 00050280 w: c0.x c0.x c0.x op: 00000000 15: xyz: c1 t5 t0 bias-> t0.xyz (03800161) w: t0 t0 t0 bias-> (00000000) xyz: c1.xyz t5.yyy t0.xyz op: 00020300 w: c1.x c1.x c1.x op: 00000000 16: xyz: c2 t5 t0 bias-> t0.xyz (03800162) w: t0 t0 t0 bias-> (00000000) xyz: c2.xyz t5.zzz t0.xyz op: 00020380 w: c2.x c2.x c2.x op: 00000000 NODE 1: alu_offset: 17, tex_offset: 6, alu_end: 3, tex_end: 0 (code_addr: 004060d1) TEX: TEX t0, t0, texture[0] (00008000) 17: xyz: t9 t0 t6 bias-> t0.xyz (03806009) w: t0 t0 t0 bias-> (00000000) xyz: t9.xyz t0.xyz t6.xyz op: 00020200 w: t9.x t9.x t9.x op: 00000000 18: xyz: t0 c5 t0 bias-> t5.xyz (03940940) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz c5.xyz 0.0 op: 00050200 w: t0.x t0.x t0.x op: 00000000 19: xyz: t10 c4 t5 bias-> t4.xyz (0390590a) w: t0 t0 t0 bias-> (00000000) xyz: t10.xyz c4.xyz t5.xyz op: 00020200 w: t10.x t10.x t10.x op: 00000000 20: xyz: t4 t0 t0 bias-> o0.xyz (1c000004) w: t4 t0 t0 bias-> o0.w (01000004) xyz: t4.xyz 1.0 0.0 op: 00050a80 w: t4.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[10] DCL OUT[2], GENERIC[11] DCL OUT[3], GENERIC[12] DCL OUT[4], GENERIC[13] DCL OUT[5], GENERIC[14] DCL CONST[0..8] DCL TEMP[0..1] 0: MUL TEMP[0], CONST[0], IN[1].xxxx 1: MAD TEMP[1], CONST[1], IN[1].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[1].zzzz, TEMP[1] 3: MAD OUT[5].xy, CONST[3].xyxx, IN[1].wwxx, TEMP[0].xyxx 4: ADD TEMP[0].xyz, CONST[4].xyzz, -IN[0].xyzz 5: DP3 OUT[4].x, TEMP[0].xyzz, IN[2].xyzz 6: DP3 OUT[4].y, TEMP[0].xyzz, IN[3].xyzz 7: DP3 OUT[4].z, TEMP[0].xyzz, IN[4].xyzz 8: MOV OUT[3].xyz, IN[2].xyzx 9: MOV OUT[2].xyz, IN[3].xyzx 10: MOV OUT[1].xyz, IN[4].xyzx 11: MUL TEMP[0], CONST[5], IN[0].xxxx 12: MAD TEMP[1], CONST[6], IN[0].yyyy, TEMP[0] 13: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[1] 14: MAD OUT[0], CONST[8], IN[0].wwww, TEMP[0] 15: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[5].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: ADD temp[0].xyz, const[4].xyzz, -input[0].xyzz; 5: DP3 output[4].x, temp[0].xyzz, input[2].xyzz; 6: DP3 output[4].y, temp[0].xyzz, input[3].xyzz; 7: DP3 output[4].z, temp[0].xyzz, input[4].xyzz; 8: MOV output[3].xyz, input[2].xyzx; 9: MOV output[2].xyz, input[3].xyzx; 10: MOV output[1].xyz, input[4].xyzx; 11: MUL temp[0], const[5], input[0].xxxx; 12: MAD temp[1], const[6], input[0].yyyy, temp[0]; 13: MAD temp[0], const[7], input[0].zzzz, temp[1]; 14: MAD temp[2], const[8], input[0].wwww, temp[0]; 15: MOV output[0], temp[2]; 16: MOV output[6], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[5].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: ADD temp[0].xyz, const[4].xyzz, -input[0].xyzz; 5: DP3 output[4].x, temp[0].xyzz, input[2].xyzz; 6: DP3 output[4].y, temp[0].xyzz, input[3].xyzz; 7: DP3 output[4].z, temp[0].xyzz, input[4].xyzz; 8: MOV output[3].xyz, input[2].xyzx; 9: MOV output[2].xyz, input[3].xyzx; 10: MOV output[1].xyz, input[4].xyzx; 11: MUL temp[0], const[5], input[0].xxxx; 12: MAD temp[1], const[6], input[0].yyyy, temp[0]; 13: MAD temp[0], const[7], input[0].zzzz, temp[1]; 14: MAD temp[2], const[8], input[0].wwww, temp[0]; 15: MOV output[0], temp[2]; 16: MOV output[6], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[5].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: ADD temp[0].xyz, const[4].xyzz, -input[0].xyzz; 5: DP3 output[4].x, temp[0].xyzz, input[2].xyzz; 6: DP3 output[4].y, temp[0].xyzz, input[3].xyzz; 7: DP3 output[4].z, temp[0].xyzz, input[4].xyzz; 8: MOV output[3].xyz, input[2].xyzx; 9: MOV output[2].xyz, input[3].xyzx; 10: MOV output[1].xyz, input[4].xyzx; 11: MUL temp[0], const[5], input[0].xxxx; 12: MAD temp[1], const[6], input[0].yyyy, temp[0]; 13: MAD temp[0], const[7], input[0].zzzz, temp[1]; 14: MAD temp[2], const[8], input[0].wwww, temp[0]; 15: MOV output[0], temp[2]; 16: MOV output[6], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[5].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: ADD temp[0].xyz, const[4].xyzz, -input[0].xyzz; 5: DP3 output[4].x, temp[0].xyzz, input[2].xyzz; 6: DP3 output[4].y, temp[0].xyzz, input[3].xyzz; 7: DP3 output[4].z, temp[0].xyzz, input[4].xyzz; 8: MOV output[3].xyz, input[2].xyzx; 9: MOV output[2].xyz, input[3].xyzx; 10: MOV output[1].xyz, input[4].xyzx; 11: MUL temp[0], const[5], input[0].xxxx; 12: MAD temp[1], const[6], input[0].yyyy, temp[0]; 13: MAD temp[0], const[7], input[0].zzzz, temp[1]; 14: MAD temp[2], const[8], input[0].wwww, temp[0]; 15: MOV output[0], temp[2]; 16: MOV output[6], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[5].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: ADD temp[0].xyz, const[4].xyzz, -input[0].xyzz; 5: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 6: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 7: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 8: MOV output[3].xyz, input[2].xyzx; 9: MOV output[2].xyz, input[3].xyzx; 10: MOV output[1].xyz, input[4].xyzx; 11: MUL temp[0], const[5], input[0].xxxx; 12: MAD temp[1], const[6], input[0].yyyy, temp[0]; 13: MAD temp[0], const[7], input[0].zzzz, temp[1]; 14: MAD temp[2], const[8], input[0].wwww, temp[0]; 15: MOV output[0], temp[2]; 16: MOV output[6], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[5].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: ADD temp[0].xyz, const[4].xyzz, -input[0].xyzz; 5: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 6: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 7: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 8: MOV output[3].xyz, input[2].xyzx; 9: MOV output[2].xyz, input[3].xyzx; 10: MOV output[1].xyz, input[4].xyzx; 11: MUL temp[0], const[5], input[0].xxxx; 12: MAD temp[1], const[6], input[0].yyyy, temp[0]; 13: MAD temp[0], const[7], input[0].zzzz, temp[1]; 14: MAD temp[2], const[8], input[0].wwww, temp[0]; 15: MOV output[0], temp[2]; 16: MOV output[6], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[5].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: ADD temp[0].xyz, const[4].xyz_, -input[0].xyz_; 5: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 6: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 7: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 8: MOV output[3].xyz, input[2].xyz_; 9: MOV output[2].xyz, input[3].xyz_; 10: MOV output[1].xyz, input[4].xyz_; 11: MUL temp[0], const[5], input[0].xxxx; 12: MAD temp[1], const[6], input[0].yyyy, temp[0]; 13: MAD temp[0], const[7], input[0].zzzz, temp[1]; 14: MAD temp[2], const[8], input[0].wwww, temp[0]; 15: MOV output[0], temp[2]; 16: MOV output[6], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[5].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: ADD temp[0].xyz, const[4].xyz_, -input[0].xyz_; 5: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 6: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 7: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 8: MOV output[3].xyz, input[2].xyz_; 9: MOV output[2].xyz, input[3].xyz_; 10: MOV output[1].xyz, input[4].xyz_; 11: MUL temp[0], const[5], input[0].xxxx; 12: MAD temp[1], const[6], input[0].yyyy, temp[0]; 13: MAD temp[0], const[7], input[0].zzzz, temp[1]; 14: MAD temp[2], const[8], input[0].wwww, temp[0]; 15: MOV output[0], temp[2]; 16: MOV output[6], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[5].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: ADD temp[0].xyz, const[4].xyz_, -input[0].xyz_; 5: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 6: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 7: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 8: MOV output[3].xyz, input[2].xyz_; 9: MOV output[2].xyz, input[3].xyz_; 10: MOV output[1].xyz, input[4].xyz_; 11: MUL temp[0], const[5], input[0].xxxx; 12: MAD temp[1], const[6], input[0].yyyy, temp[0]; 13: MAD temp[0], const[7], input[0].zzzz, temp[1]; 14: MAD temp[2], const[8], input[0].wwww, temp[0]; 15: MOV output[0], temp[2]; 16: MOV output[6], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[5].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: ADD temp[0].xyz, const[4].xyz_, -input[0].xyz_; 5: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 6: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 7: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 8: MOV output[3].xyz, input[2].xyz_; 9: MOV output[2].xyz, input[3].xyz_; 10: MOV output[1].xyz, input[4].xyz_; 11: MUL temp[0], const[5], input[0].xxxx; 12: MAD temp[1], const[6], input[0].yyyy, temp[0]; 13: MAD temp[0], const[7], input[0].zzzz, temp[1]; 14: MAD temp[2], const[8], input[0].wwww, temp[0]; 15: MOV output[0], temp[2]; 16: MOV output[6], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[5].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: ADD temp[0].xyz, const[4].xyz_, -input[0].xyz_; 5: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 6: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 7: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 8: MOV output[3].xyz, input[2].xyz_; 9: MOV output[2].xyz, input[3].xyz_; 10: MOV output[1].xyz, input[4].xyz_; 11: MUL temp[0], const[5], input[0].xxxx; 12: MAD temp[1], const[6], input[0].yyyy, temp[0]; 13: MAD temp[0], const[7], input[0].zzzz, temp[1]; 14: MAD temp[0], const[8], input[0].wwww, temp[0]; 15: MOV output[0], temp[0]; 16: MOV output[6], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[5].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: ADD temp[0].xyz, const[4].xyz_, -input[0].xyz_; 5: DP4 output[4].x, temp[0].xyz0, input[2].xyz0; 6: DP4 output[4].y, temp[0].xyz0, input[3].xyz0; 7: DP4 output[4].z, temp[0].xyz0, input[4].xyz0; 8: MOV output[3].xyz, input[2].xyz_; 9: MOV output[2].xyz, input[3].xyz_; 10: MOV output[1].xyz, input[4].xyz_; 11: MUL temp[0], const[5], input[0].xxxx; 12: MAD temp[1], const[6], input[0].yyyy, temp[0]; 13: MAD temp[0], const[7], input[0].zzzz, temp[1]; 14: MAD temp[0], const[8], input[0].wwww, temp[0]; 15: MOV output[0], temp[0]; 16: MOV output[6], temp[0]; Final vertex program code: 0: op: 0x00300002 dst: 0t op: VE_MULTIPLY src0: 0x01f90002 reg: 0c swiz: X/ Y/ U/ U src1: 0x01f80021 reg: 1i swiz: X/ X/ U/ U src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00302004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x01f90022 reg: 1c swiz: X/ Y/ U/ U src1: 0x01f92021 reg: 1i swiz: Y/ Y/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 2: op: 0x00300004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x01f90042 reg: 2c swiz: X/ Y/ U/ U src1: 0x01fa4021 reg: 1i swiz: Z/ Z/ U/ U src2: 0x01f90020 reg: 1t swiz: X/ Y/ U/ U 3: op: 0x0030a204 dst: 5o op: VE_MULTIPLY_ADD src0: 0x01f90062 reg: 3c swiz: X/ Y/ U/ U src1: 0x01fb6021 reg: 1i swiz: W/ W/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 4: op: 0x00700003 dst: 0t op: VE_ADD src0: 0x01d10082 reg: 4c swiz: X/ Y/ Z/ U src1: 0x1fd10001 reg: 0i swiz: -X/-Y/-Z/-U src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 5: op: 0x00108201 dst: 4o op: VE_DOT_PRODUCT src0: 0x01110000 reg: 0t swiz: X/ Y/ Z/ 0 src1: 0x01110041 reg: 2i swiz: X/ Y/ Z/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 6: op: 0x00208201 dst: 4o op: VE_DOT_PRODUCT src0: 0x01110000 reg: 0t swiz: X/ Y/ Z/ 0 src1: 0x01110061 reg: 3i swiz: X/ Y/ Z/ 0 src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 7: op: 0x00408201 dst: 4o op: VE_DOT_PRODUCT src0: 0x01110000 reg: 0t swiz: X/ Y/ Z/ 0 src1: 0x01110081 reg: 4i swiz: X/ Y/ Z/ 0 src2: 0x01248081 reg: 4i swiz: 0/ 0/ 0/ 0 8: op: 0x00706203 dst: 3o op: VE_ADD src0: 0x01d10041 reg: 2i swiz: X/ Y/ Z/ U src1: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 9: op: 0x00704203 dst: 2o op: VE_ADD src0: 0x01d10061 reg: 3i swiz: X/ Y/ Z/ U src1: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 10: op: 0x00702203 dst: 1o op: VE_ADD src0: 0x01d10081 reg: 4i swiz: X/ Y/ Z/ U src1: 0x01248081 reg: 4i swiz: 0/ 0/ 0/ 0 src2: 0x01248081 reg: 4i swiz: 0/ 0/ 0/ 0 11: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 12: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 13: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 14: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10102 reg: 8c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 15: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 16: op: 0x00f0c203 dst: 6o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], GENERIC[10], PERSPECTIVE DCL IN[1], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..4] IMM FLT32 { -0.5000, 0.0000, 0.0000, 0.0000} 0: TEX TEMP[0], IN[1].xyyy, SAMP[0], 2D 1: MUL TEMP[1].w, TEMP[0].xxxw, CONST[1].xxxx 2: TEX TEMP[2].xyz, IN[1].xyyy, SAMP[1], 2D 3: ADD TEMP[3].xyz, TEMP[2].xyzz, IMM[0].xxxy 4: DP3 TEMP[2].x, TEMP[3].xyzz, TEMP[3].xyzz 5: RSQ TEMP[4].x, TEMP[2].xxxx 6: MUL TEMP[2].xyz, TEMP[3].xyzz, TEMP[4].xxxx 7: DP3 TEMP[3].x, IN[0].xyzz, IN[0].xyzz 8: RSQ TEMP[4].x, TEMP[3].xxxx 9: MUL TEMP[3].xyz, IN[0].xyzz, TEMP[4].xxxx 10: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[3].xyzz 11: MAX TEMP[2].x, TEMP[4].xxxx, IMM[0].yyyy 12: MUL TEMP[3].xyz, CONST[2].xyzz, TEMP[2].xxxx 13: MAD TEMP[2].xyz, TEMP[3].xyzz, CONST[0].xyzz, CONST[3].xyzz 14: MUL TEMP[1].xyz, TEMP[0].xyzx, TEMP[2].xyzx 15: MOV OUT[0], TEMP[1] 16: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[1].xxxx; 2: TEX temp[2].xyz, input[1].xyyy, 2D[1]; 3: ADD temp[3].xyz, temp[2].xyzz, const[4].xxxy; 4: DP3 temp[2].x, temp[3].xyzz, temp[3].xyzz; 5: RSQ temp[4].x, temp[2].xxxx; 6: MUL temp[2].xyz, temp[3].xyzz, temp[4].xxxx; 7: DP3 temp[3].x, input[0].xyzz, input[0].xyzz; 8: RSQ temp[4].x, temp[3].xxxx; 9: MUL temp[3].xyz, input[0].xyzz, temp[4].xxxx; 10: DP3 temp[4].x, temp[2].xyzz, temp[3].xyzz; 11: MAX temp[2].x, temp[4].xxxx, const[4].yyyy; 12: MUL temp[3].xyz, const[2].xyzz, temp[2].xxxx; 13: MAD temp[2].xyz, temp[3].xyzz, const[0].xyzz, const[3].xyzz; 14: MUL temp[1].xyz, temp[0].xyzx, temp[2].xyzx; 15: MOV output[0], temp[1]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[1].xxxx; 2: TEX temp[2].xyz, input[1].xyyy, 2D[1]; 3: ADD temp[3].xyz, temp[2].xyzz, const[4].xxxy; 4: DP3 temp[2].x, temp[3].xyzz, temp[3].xyzz; 5: RSQ temp[4].x, temp[2].xxxx; 6: MUL temp[2].xyz, temp[3].xyzz, temp[4].xxxx; 7: DP3 temp[3].x, input[0].xyzz, input[0].xyzz; 8: RSQ temp[4].x, temp[3].xxxx; 9: MUL temp[3].xyz, input[0].xyzz, temp[4].xxxx; 10: DP3 temp[4].x, temp[2].xyzz, temp[3].xyzz; 11: MAX temp[2].x, temp[4].xxxx, const[4].yyyy; 12: MUL temp[3].xyz, const[2].xyzz, temp[2].xxxx; 13: MAD temp[2].xyz, temp[3].xyzz, const[0].xyzz, const[3].xyzz; 14: MUL temp[1].xyz, temp[0].xyzx, temp[2].xyzx; 15: MOV output[0], temp[1]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[1].xxxx; 2: TEX temp[2].xyz, input[1].xyyy, 2D[1]; 3: ADD temp[3].xyz, temp[2].xyzz, const[4].xxxy; 4: DP3 temp[2].x, temp[3].xyzz, temp[3].xyzz; 5: RSQ temp[4].x, temp[2].xxxx; 6: MUL temp[2].xyz, temp[3].xyzz, temp[4].xxxx; 7: DP3 temp[3].x, input[0].xyzz, input[0].xyzz; 8: RSQ temp[4].x, temp[3].xxxx; 9: MUL temp[3].xyz, input[0].xyzz, temp[4].xxxx; 10: DP3 temp[4].x, temp[2].xyzz, temp[3].xyzz; 11: MAX temp[2].x, temp[4].xxxx, const[4].yyyy; 12: MUL temp[3].xyz, const[2].xyzz, temp[2].xxxx; 13: MAD temp[2].xyz, temp[3].xyzz, const[0].xyzz, const[3].xyzz; 14: MUL temp[1].xyz, temp[0].xyzx, temp[2].xyzx; 15: MOV output[0], temp[1]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[1].xxxx; 2: TEX temp[2].xyz, input[1].xyyy, 2D[1]; 3: ADD temp[3].xyz, temp[2].xyzz, const[4].xxxy; 4: DP3 temp[2].x, temp[3].xyzz, temp[3].xyzz; 5: RSQ temp[4].x, temp[2].xxxx; 6: MUL temp[2].xyz, temp[3].xyzz, temp[4].xxxx; 7: DP3 temp[3].x, input[0].xyzz, input[0].xyzz; 8: RSQ temp[4].x, temp[3].xxxx; 9: MUL temp[3].xyz, input[0].xyzz, temp[4].xxxx; 10: DP3 temp[4].x, temp[2].xyzz, temp[3].xyzz; 11: MAX temp[2].x, temp[4].xxxx, const[4].yyyy; 12: MUL temp[3].xyz, const[2].xyzz, temp[2].xxxx; 13: MAD temp[2].xyz, temp[3].xyzz, const[0].xyzz, const[3].xyzz; 14: MUL temp[1].xyz, temp[0].xyzx, temp[2].xyzx; 15: MOV output[0], temp[1]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[1].xxxx; 2: TEX temp[2].xyz, input[1].xyyy, 2D[1]; 3: ADD temp[3].xyz, temp[2].xyzz, const[4].xxxy; 4: DP3 temp[2].x, temp[3].xyzz, temp[3].xyzz; 5: RSQ temp[4].x, temp[2].xxxx; 6: MUL temp[2].xyz, temp[3].xyzz, temp[4].xxxx; 7: DP3 temp[3].x, input[0].xyzz, input[0].xyzz; 8: RSQ temp[4].x, temp[3].xxxx; 9: MUL temp[3].xyz, input[0].xyzz, temp[4].xxxx; 10: DP3 temp[4].x, temp[2].xyzz, temp[3].xyzz; 11: MAX temp[2].x, temp[4].xxxx, const[4].yyyy; 12: MUL temp[3].xyz, const[2].xyzz, temp[2].xxxx; 13: MAD temp[2].xyz, temp[3].xyzz, const[0].xyzz, const[3].xyzz; 14: MUL temp[1].xyz, temp[0].xyzx, temp[2].xyzx; 15: MOV output[0], temp[1]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[1].xxxx; 2: TEX temp[5], input[1].xyyy, 2D[1]; 3: MOV temp[2].xyz, temp[5]; 4: ADD temp[3].xyz, temp[2].xyzz, const[4].xxxy; 5: DP3 temp[2].x, temp[3].xyzz, temp[3].xyzz; 6: RSQ temp[4].x, temp[2].xxxx; 7: MUL temp[2].xyz, temp[3].xyzz, temp[4].xxxx; 8: DP3 temp[3].x, input[0].xyzz, input[0].xyzz; 9: RSQ temp[4].x, temp[3].xxxx; 10: MUL temp[3].xyz, input[0].xyzz, temp[4].xxxx; 11: DP3 temp[4].x, temp[2].xyzz, temp[3].xyzz; 12: MAX temp[2].x, temp[4].xxxx, const[4].yyyy; 13: MUL temp[3].xyz, const[2].xyzz, temp[2].xxxx; 14: MAD temp[2].xyz, temp[3].xyzz, const[0].xyzz, const[3].xyzz; 15: MUL temp[1].xyz, temp[0].xyzx, temp[2].xyzx; 16: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[1].xyyy, 2D[0]; 1: MUL temp[1].w, temp[0].xxxw, const[1].xxxx; 2: TEX temp[5], input[1].xyyy, 2D[1]; 3: MOV temp[2].xyz, temp[5]; 4: ADD temp[3].xyz, temp[2].xyzz, const[4].xxxy; 5: DP3 temp[2].x, temp[3].xyzz, temp[3].xyzz; 6: RSQ temp[4].x, |temp[2].xxxx|; 7: MUL temp[2].xyz, temp[3].xyzz, temp[4].xxxx; 8: DP3 temp[3].x, input[0].xyzz, input[0].xyzz; 9: RSQ temp[4].x, |temp[3].xxxx|; 10: MUL temp[3].xyz, input[0].xyzz, temp[4].xxxx; 11: DP3 temp[4].x, temp[2].xyzz, temp[3].xyzz; 12: MAX temp[2].x, temp[4].xxxx, const[4].yyyy; 13: MUL temp[3].xyz, const[2].xyzz, temp[2].xxxx; 14: MAD temp[2].xyz, temp[3].xyzz, const[0].xyzz, const[3].xyzz; 15: MUL temp[1].xyz, temp[0].xyzx, temp[2].xyzx; 16: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[1].___x; 2: TEX temp[5].xyz, input[1].xy__, 2D[1]; 3: MOV temp[2].xyz, temp[5].xyz_; 4: ADD temp[3].xyz, temp[2].xyz_, const[4].xxx_; 5: DP3 temp[2].x, temp[3].xyz_, temp[3].xyz_; 6: RSQ temp[4].x, |temp[2].x___|; 7: MUL temp[2].xyz, temp[3].xyz_, temp[4].xxx_; 8: DP3 temp[3].x, input[0].xyz_, input[0].xyz_; 9: RSQ temp[4].x, |temp[3].x___|; 10: MUL temp[3].xyz, input[0].xyz_, temp[4].xxx_; 11: DP3 temp[4].x, temp[2].xyz_, temp[3].xyz_; 12: MAX temp[2].x, temp[4].x___, const[4].y___; 13: MUL temp[3].xyz, const[2].xyz_, temp[2].xxx_; 14: MAD temp[2].xyz, temp[3].xyz_, const[0].xyz_, const[3].xyz_; 15: MUL temp[1].xyz, temp[0].xyz_, temp[2].xyz_; 16: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[1].___x; 2: TEX temp[5].xyz, input[1].xy__, 2D[1]; 3: MOV temp[2].xyz, temp[5].xyz_; 4: ADD temp[3].xyz, temp[2].xyz_, const[4].xxx_; 5: DP3 temp[2].x, temp[3].xyz_, temp[3].xyz_; 6: RSQ temp[4].x, |temp[2].x___|; 7: MUL temp[2].xyz, temp[3].xyz_, temp[4].xxx_; 8: DP3 temp[3].x, input[0].xyz_, input[0].xyz_; 9: RSQ temp[4].x, |temp[3].x___|; 10: MUL temp[3].xyz, input[0].xyz_, temp[4].xxx_; 11: DP3 temp[4].x, temp[2].xyz_, temp[3].xyz_; 12: MAX temp[2].x, temp[4].x___, const[4].y___; 13: MUL temp[3].xyz, const[2].xyz_, temp[2].xxx_; 14: MAD temp[2].xyz, temp[3].xyz_, const[0].xyz_, const[3].xyz_; 15: MUL temp[1].xyz, temp[0].xyz_, temp[2].xyz_; 16: MOV output[0], temp[1]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[1].___x; 2: TEX temp[5].xyz, input[1].xy__, 2D[1]; 3: ADD temp[3].xyz, temp[5].xyz_, none.-H-H-H_; 4: DP3 temp[2].x, temp[3].xyz_, temp[3].xyz_; 5: RSQ temp[4].x, |temp[2].x___|; 6: MUL temp[2].xyz, temp[3].xyz_, temp[4].xxx_; 7: DP3 temp[3].x, input[0].xyz_, input[0].xyz_; 8: RSQ temp[4].x, |temp[3].x___|; 9: MUL temp[3].xyz, input[0].xyz_, temp[4].xxx_; 10: DP3 temp[4].x, temp[2].xyz_, temp[3].xyz_; 11: MAX temp[2].x, temp[4].x___, none.0___; 12: MUL temp[3].xyz, const[2].xyz_, temp[2].xxx_; 13: MAD temp[2].xyz, temp[3].xyz_, const[0].xyz_, const[3].xyz_; 14: MUL temp[1].xyz, temp[0].xyz_, temp[2].xyz_; 15: MOV output[0], temp[1]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[1].___x; 2: TEX temp[5].xyz, input[1].xy__, 2D[1]; 3: ADD temp[3].xyz, temp[5].xyz_, none.-H-H-H_; 4: DP3 temp[2].x, temp[3].xyz_, temp[3].xyz_; 5: RSQ temp[4].x, |temp[2].x___|; 6: MUL temp[2].xyz, temp[3].xyz_, temp[4].xxx_; 7: DP3 temp[3].x, input[0].xyz_, input[0].xyz_; 8: RSQ temp[4].x, |temp[3].x___|; 9: MUL temp[3].xyz, input[0].xyz_, temp[4].xxx_; 10: DP3 temp[4].x, temp[2].xyz_, temp[3].xyz_; 11: MAX temp[2].x, temp[4].x___, none.0___; 12: MUL temp[3].xyz, const[2].xyz_, temp[2].xxx_; 13: MAD temp[2].xyz, temp[3].xyz_, const[0].xyz_, const[3].xyz_; 14: MUL temp[1].xyz, temp[0].xyz_, temp[2].xyz_; 15: MOV output[0], temp[1]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[1].___x; 2: TEX temp[5].xyz, input[1].xy__, 2D[1]; 3: ADD temp[3].xyz, temp[5].xyz_, none.-H-H-H_; 4: DP3 temp[2].x, temp[3].xyz_, temp[3].xyz_; 5: RSQ temp[4].x, |temp[2].x___|; 6: MUL temp[2].xyz, temp[3].xyz_, temp[4].xxx_; 7: DP3 temp[3].x, input[0].xyz_, input[0].xyz_; 8: RSQ temp[4].x, |temp[3].x___|; 9: MUL temp[3].xyz, input[0].xyz_, temp[4].xxx_; 10: DP3 temp[4].x, temp[2].xyz_, temp[3].xyz_; 11: MAX temp[2].x, temp[4].x___, none.0___; 12: MUL temp[3].xyz, const[2].xyz_, temp[2].xxx_; 13: MAD temp[2].xyz, temp[3].xyz_, const[0].xyz_, const[3].xyz_; 14: MUL temp[1].xyz, temp[0].xyz_, temp[2].xyz_; 15: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL temp[1].w, temp[0].___w, const[1].___x; 2: TEX temp[2].xyz, input[1].xy__, 2D[1]; 3: ADD temp[3].xyz, temp[2].xyz_, none.-H-H-H_; 4: DP3 temp[4].x, temp[3].xyz_, temp[3].xyz_; 5: RSQ temp[5].x, |temp[4].x___|; 6: MUL temp[4].xyz, temp[3].xyz_, temp[5].xxx_; 7: DP3 temp[3].x, input[0].xyz_, input[0].xyz_; 8: RSQ temp[6].x, |temp[3].x___|; 9: MUL temp[7].xyz, input[0].xyz_, temp[6].xxx_; 10: DP3 temp[8].x, temp[4].xyz_, temp[7].xyz_; 11: MAX temp[4].x, temp[8].x___, none.0___; 12: MUL temp[9].xyz, const[2].xyz_, temp[4].xxx_; 13: MAD temp[10].xyz, temp[9].xyz_, const[0].xyz_, const[3].xyz_; 14: MUL temp[1].xyz, temp[0].xyz_, temp[10].xyz_; 15: MOV output[0], temp[1]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: src0.xyz = const[1], src0.w = temp[0] MAD temp[1].w, src0.w, src0.x, src0.0 2: TEX temp[2].xyz, input[1].xy__, 2D[1]; 3: src0.xyz = temp[2] MAD temp[3].xyz, src0.xyz, src0.111, -src0.HHH 4: src0.xyz = temp[3] DP3 temp[4].x, src0.xyz, src0.xyz 5: src0.xyz = temp[4] REPL_ALPHA temp[5].x RSQ, |src0.x| 6: src0.xyz = temp[3], src1.xyz = temp[5] MAD temp[4].xyz, src0.xyz, src1.xxx, src0.000 7: src0.xyz = input[0] DP3 temp[3].x, src0.xyz, src0.xyz 8: src0.xyz = temp[3] REPL_ALPHA temp[6].x RSQ, |src0.x| 9: src0.xyz = input[0], src1.xyz = temp[6] MAD temp[7].xyz, src0.xyz, src1.xxx, src0.000 10: src0.xyz = temp[4], src1.xyz = temp[7] DP3 temp[8].x, src0.xyz, src1.xyz 11: src0.xyz = temp[8] MAX temp[4].x, src0.x__, src0.0__ 12: src0.xyz = const[2], src1.xyz = temp[4] MAD temp[9].xyz, src0.xyz, src1.xxx, src0.000 13: src0.xyz = temp[9], src1.xyz = const[0], src2.xyz = const[3] MAD temp[10].xyz, src0.xyz, src1.xyz, src2.xyz 14: src0.xyz = temp[0], src1.xyz = temp[10] MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 15: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[1].xy__, 2D[0]; 2: TEX temp[2].xyz, input[1].xy__, 2D[1]; 3: src0.xyz = temp[2], src0.w = temp[0], src1.xyz = const[1] MAD temp[3].xyz, src0.xyz, src0.111, -src0.HHH MAD temp[1].w, src0.w, src1.x, src0.0 4: src0.xyz = temp[3] DP3 temp[4].x, src0.xyz, src0.xyz 5: src0.xyz = temp[4] REPL_ALPHA temp[5].x RSQ, |src0.x| 6: src0.xyz = temp[3], src1.xyz = temp[5] MAD temp[4].xyz, src0.xyz, src1.xxx, src0.000 7: src0.xyz = input[0] DP3 temp[3].x, src0.xyz, src0.xyz 8: src0.xyz = temp[3] REPL_ALPHA temp[6].x RSQ, |src0.x| 9: src0.xyz = input[0], src1.xyz = temp[6] MAD temp[7].xyz, src0.xyz, src1.xxx, src0.000 10: src0.xyz = temp[4], src1.xyz = temp[7] DP3 temp[8].x, src0.xyz, src1.xyz 11: src0.xyz = temp[8] MAX temp[4].x, src0.x__, src0.0__ 12: src0.xyz = const[2], src1.xyz = temp[4] MAD temp[9].xyz, src0.xyz, src1.xxx, src0.000 13: src0.xyz = temp[9], src1.xyz = const[0], src2.xyz = const[3] MAD temp[10].xyz, src0.xyz, src1.xyz, src2.xyz 14: src0.xyz = temp[0], src1.xyz = temp[10] MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 15: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[2], temp[1].xy__, 2D[0]; 2: TEX temp[1].xyz, temp[1].xy__, 2D[1]; 3: src0.xyz = temp[1], src0.w = temp[2], src1.xyz = const[1] MAD temp[3].xyz, src0.xyz, src0.111, -src0.HHH MAD temp[1].w, src0.w, src1.x, src0.0 4: src0.xyz = temp[3] DP3 temp[4].x, src0.xyz, src0.xyz 5: src0.xyz = temp[4] REPL_ALPHA temp[5].x RSQ, |src0.x| 6: src0.xyz = temp[3], src1.xyz = temp[5] MAD temp[4].xyz, src0.xyz, src1.xxx, src0.000 7: src0.xyz = temp[0] DP3 temp[3].x, src0.xyz, src0.xyz 8: src0.xyz = temp[3] REPL_ALPHA temp[3].x RSQ, |src0.x| 9: src0.xyz = temp[0], src1.xyz = temp[3] MAD temp[0].xyz, src0.xyz, src1.xxx, src0.000 10: src0.xyz = temp[4], src1.xyz = temp[0] DP3 temp[0].x, src0.xyz, src1.xyz 11: src0.xyz = temp[0] MAX temp[4].x, src0.x__, src0.0__ 12: src0.xyz = const[2], src1.xyz = temp[4] MAD temp[0].xyz, src0.xyz, src1.xxx, src0.000 13: src0.xyz = temp[0], src1.xyz = const[0], src2.xyz = const[3] MAD temp[0].xyz, src0.xyz, src1.xyz, src2.xyz 14: src0.xyz = temp[2], src1.xyz = temp[0] MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 15: src0.xyz = temp[1], src0.w = temp[1] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=22************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 12, tex_end: 1 (code_addr: 00420300) TEX: TEX t2, t1, texture[0] (00008081) TEX t1, t1, texture[1] (00008841) 0: xyz: t1 c1 t0 bias-> t3.xyz (038c0841) w: t2 t0 t0 bias-> t1.w (00840002) xyz: t1.xyz 1.0 -0.5 op: 000d8a80 w: t2.w c1.x 0.0 op: 00040189 1: xyz: t3 t0 t0 bias-> t4.x (00900003) w: t0 t0 t0 bias-> (00000000) xyz: t3.xyz t3.xyz t3.xxx op: 00804000 w: t3.x t3.x t3.x op: 00000000 2: xyz: t4 t0 t0 bias-> t5.x (00940004) w: t0 t0 t0 bias-> (00000000) xyz: t4.xxx t4.xxx t4.xxx op: 05004081 w: |t4.x| t4.x t4.x op: 05800040 3: xyz: t3 t5 t0 bias-> t4.xyz (03900143) w: t0 t0 t0 bias-> (00000000) xyz: t3.xyz t5.xxx 0.0 op: 00050280 w: t3.x t3.x t3.x op: 00000000 4: xyz: t0 t0 t0 bias-> t3.x (008c0000) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz t0.xyz t0.xxx op: 00804000 w: t0.x t0.x t0.x op: 00000000 5: xyz: t3 t0 t0 bias-> t3.x (008c0003) w: t0 t0 t0 bias-> (00000000) xyz: t3.xxx t3.xxx t3.xxx op: 05004081 w: |t3.x| t3.x t3.x op: 05800040 6: xyz: t0 t3 t0 bias-> t0.xyz (038000c0) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz t3.xxx 0.0 op: 00050280 w: t0.x t0.x t0.x op: 00000000 7: xyz: t4 t0 t0 bias-> t0.x (00800004) w: t0 t0 t0 bias-> (00000000) xyz: t4.xyz t0.xyz t4.xxx op: 00804200 w: t4.x t4.x t4.x op: 00000000 8: xyz: t0 t0 t0 bias-> t4.x (00900000) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz 0.0 t0.xxx op: 02804a00 w: t0.x t0.x t0.x op: 00000000 9: xyz: c2 t4 t0 bias-> t0.xyz (03800122) w: t0 t0 t0 bias-> (00000000) xyz: c2.xyz t4.xxx 0.0 op: 00050280 w: c2.x c2.x c2.x op: 00000000 10: xyz: t0 c0 c3 bias-> t0.xyz (03823800) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz c0.xyz c3.xyz op: 00020200 w: t0.x t0.x t0.x op: 00000000 11: xyz: t2 t0 t0 bias-> t1.xyz (03840002) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz t0.xyz 0.0 op: 00050200 w: t2.x t2.x t2.x op: 00000000 12: xyz: t1 t0 t0 bias-> o0.xyz (1c000001) w: t1 t0 t0 bias-> o0.w (01000001) xyz: t1.xyz 1.0 0.0 op: 00050a80 w: t1.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[10] DCL OUT[2], GENERIC[11] DCL CONST[0..8] DCL TEMP[0..1] 0: MUL TEMP[0], CONST[0], IN[1].xxxx 1: MAD TEMP[1], CONST[1], IN[1].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[1].zzzz, TEMP[1] 3: MAD OUT[2].xy, CONST[3].xyxx, IN[1].wwxx, TEMP[0].xyxx 4: DP3 OUT[1].x, CONST[4].xyzz, IN[2].xyzz 5: DP3 OUT[1].y, CONST[4].xyzz, IN[3].xyzz 6: DP3 OUT[1].z, CONST[4].xyzz, IN[4].xyzz 7: MUL TEMP[0], CONST[5], IN[0].xxxx 8: MAD TEMP[1], CONST[6], IN[0].yyyy, TEMP[0] 9: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[1] 10: MAD OUT[0], CONST[8], IN[0].wwww, TEMP[0] 11: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP3 output[1].x, const[4].xyzz, input[2].xyzz; 5: DP3 output[1].y, const[4].xyzz, input[3].xyzz; 6: DP3 output[1].z, const[4].xyzz, input[4].xyzz; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP3 output[1].x, const[4].xyzz, input[2].xyzz; 5: DP3 output[1].y, const[4].xyzz, input[3].xyzz; 6: DP3 output[1].z, const[4].xyzz, input[4].xyzz; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP3 output[1].x, const[4].xyzz, input[2].xyzz; 5: DP3 output[1].y, const[4].xyzz, input[3].xyzz; 6: DP3 output[1].z, const[4].xyzz, input[4].xyzz; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP3 output[1].x, const[4].xyzz, input[2].xyzz; 5: DP3 output[1].y, const[4].xyzz, input[3].xyzz; 6: DP3 output[1].z, const[4].xyzz, input[4].xyzz; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], const[0], input[1].xxxx; 1: MAD temp[1], const[1], input[1].yyyy, temp[0]; 2: MAD temp[0], const[2], input[1].zzzz, temp[1]; 3: MAD output[2].xy, const[3].xyxx, input[1].wwxx, temp[0].xyxx; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[2], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[2]; 12: MOV output[3], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[0], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[0]; 12: MOV output[3], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0].xy, const[0].xy__, input[1].xx__; 1: MAD temp[1].xy, const[1].xy__, input[1].yy__, temp[0].xy__; 2: MAD temp[0].xy, const[2].xy__, input[1].zz__, temp[1].xy__; 3: MAD output[2].xy, const[3].xy__, input[1].ww__, temp[0].xy__; 4: DP4 output[1].x, const[4].xyz0, input[2].xyz0; 5: DP4 output[1].y, const[4].xyz0, input[3].xyz0; 6: DP4 output[1].z, const[4].xyz0, input[4].xyz0; 7: MUL temp[0], const[5], input[0].xxxx; 8: MAD temp[1], const[6], input[0].yyyy, temp[0]; 9: MAD temp[0], const[7], input[0].zzzz, temp[1]; 10: MAD temp[0], const[8], input[0].wwww, temp[0]; 11: MOV output[0], temp[0]; 12: MOV output[3], temp[0]; Final vertex program code: 0: op: 0x00300002 dst: 0t op: VE_MULTIPLY src0: 0x01f90002 reg: 0c swiz: X/ Y/ U/ U src1: 0x01f80021 reg: 1i swiz: X/ X/ U/ U src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00302004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x01f90022 reg: 1c swiz: X/ Y/ U/ U src1: 0x01f92021 reg: 1i swiz: Y/ Y/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 2: op: 0x00300004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x01f90042 reg: 2c swiz: X/ Y/ U/ U src1: 0x01fa4021 reg: 1i swiz: Z/ Z/ U/ U src2: 0x01f90020 reg: 1t swiz: X/ Y/ U/ U 3: op: 0x00304204 dst: 2o op: VE_MULTIPLY_ADD src0: 0x01f90062 reg: 3c swiz: X/ Y/ U/ U src1: 0x01fb6021 reg: 1i swiz: W/ W/ U/ U src2: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U 4: op: 0x00102201 dst: 1o op: VE_DOT_PRODUCT src0: 0x01110082 reg: 4c swiz: X/ Y/ Z/ 0 src1: 0x01110041 reg: 2i swiz: X/ Y/ Z/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 5: op: 0x00202201 dst: 1o op: VE_DOT_PRODUCT src0: 0x01110082 reg: 4c swiz: X/ Y/ Z/ 0 src1: 0x01110061 reg: 3i swiz: X/ Y/ Z/ 0 src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 6: op: 0x00402201 dst: 1o op: VE_DOT_PRODUCT src0: 0x01110082 reg: 4c swiz: X/ Y/ Z/ 0 src1: 0x01110081 reg: 4i swiz: X/ Y/ Z/ 0 src2: 0x01248081 reg: 4i swiz: 0/ 0/ 0/ 0 7: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src1: 0x00000001 reg: 0i swiz: X/ X/ X/ X src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 8: op: 0x00f02004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src1: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 9: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src1: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src2: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W 10: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00d10102 reg: 8c swiz: X/ Y/ Z/ W src1: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 11: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 12: op: 0x00f06203 dst: 3o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000