r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 2: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..3] DCL TEMP[0] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: MOV OUT[1], IN[1] 5: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[0], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[0]; 6: MOV output[2], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[0], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[0]; 6: MOV output[2], temp[0]; Final vertex program code: 0: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00000001 reg: 0i swiz: X/ X/ X/ X src1: 0x00d10002 reg: 0c swiz: X/ Y/ Z/ W src2: 0x01248002 reg: 0c swiz: 0/ 0/ 0/ 0 1: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src1: 0x00d10022 reg: 1c swiz: X/ Y/ Z/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 2: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src1: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 3: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src1: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 4: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 5: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 6: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=0************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial fragment program FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[1], input[0], 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[1], input[0], 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MOV output[0], temp[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MOV output[0], temp[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MOV output[0], temp[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MOV output[0], temp[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[0].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[0], src0.w = temp[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[0].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[0], src0.w = temp[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[0].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[0], src0.w = temp[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=1************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) TEX: TEX t0, t0, texture[0] (00008000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], COLOR 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 2: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], COLOR, PERSPECTIVE DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=2************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[0] DCL CONST[0..3] DCL TEMP[0] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: MOV OUT[1], IN[1] 5: MOV OUT[2], IN[2] 6: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[2], input[2]; 6: MOV output[0], temp[1]; 7: MOV output[3], temp[1]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[2], input[2]; 6: MOV output[0], temp[1]; 7: MOV output[3], temp[1]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[2], input[2]; 6: MOV output[0], temp[1]; 7: MOV output[3], temp[1]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[2], input[2]; 6: MOV output[0], temp[1]; 7: MOV output[3], temp[1]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[2], input[2]; 6: MOV output[0], temp[1]; 7: MOV output[3], temp[1]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[2], input[2]; 6: MOV output[0], temp[1]; 7: MOV output[3], temp[1]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[2], input[2]; 6: MOV output[0], temp[1]; 7: MOV output[3], temp[1]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[2], input[2]; 6: MOV output[0], temp[1]; 7: MOV output[3], temp[1]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[2], input[2]; 6: MOV output[0], temp[1]; 7: MOV output[3], temp[1]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[0], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[2], input[2]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[0], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[2], input[2]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Final vertex program code: 0: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00000001 reg: 0i swiz: X/ X/ X/ X src1: 0x00d10002 reg: 0c swiz: X/ Y/ Z/ W src2: 0x01248002 reg: 0c swiz: 0/ 0/ 0/ 0 1: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src1: 0x00d10022 reg: 1c swiz: X/ Y/ Z/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 2: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src1: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 3: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src1: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 4: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 5: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10041 reg: 2i swiz: X/ Y/ Z/ W src1: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 6: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 7: op: 0x00f06203 dst: 3o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL IN[1], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0] 0: TXP TEMP[0], IN[1].xyyw, SAMP[0], 2D 1: MUL TEMP[0], TEMP[0], IN[0] 2: MOV OUT[0], TEMP[0] 3: END Fragment Program: before compilation # Radeon Compiler Program 0: TXP temp[0], input[1].xyyw, 2D[0]; 1: MUL temp[0], temp[0], input[0]; 2: MOV output[0], temp[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TXP temp[0], input[1].xyyw, 2D[0]; 1: MUL temp[0], temp[0], input[0]; 2: MOV output[0], temp[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TXP temp[0], input[1].xyyw, 2D[0]; 1: MUL temp[0], temp[0], input[0]; 2: MOV output[0], temp[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TXP temp[0], input[1].xyyw, 2D[0]; 1: MUL temp[0], temp[0], input[0]; 2: MOV output[0], temp[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TXP temp[0], input[1].xyyw, 2D[0]; 1: MUL temp[0], temp[0], input[0]; 2: MOV output[0], temp[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TXP temp[0], input[1].xyyw, 2D[0]; 1: MUL temp[0], temp[0], input[0]; 2: MOV output[0], temp[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TXP temp[0], input[1].xyyw, 2D[0]; 1: MUL temp[0], temp[0], input[0]; 2: MOV output[0], temp[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TXP temp[0], input[1].xy_w, 2D[0]; 1: MUL temp[0], temp[0], input[0]; 2: MOV output[0], temp[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TXP temp[0], input[1].xy_w, 2D[0]; 1: MUL temp[0], temp[0], input[0]; 2: MOV output[0], temp[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TXP temp[1], input[1].xy_w, 2D[0]; 1: MUL temp[2], temp[1], input[0]; 2: MOV output[0], temp[2]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TXP temp[1], input[1].xy_w, 2D[0]; 1: MUL temp[2], temp[1], input[0]; 2: MOV output[0], temp[2]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TXP temp[1], input[1].xy_w, 2D[0]; 1: MUL temp[2], temp[1], input[0]; 2: MOV output[0], temp[2]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TXP temp[1], input[1].xy_w, 2D[0]; 1: MUL temp[2], temp[1], input[0]; 2: MOV output[0], temp[2]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TXP temp[1], input[1].xy_w, 2D[0]; 1: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = input[0], src1.w = input[0] MAD temp[2].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[2].w, src0.w, src1.w, src0.0 2: src0.xyz = temp[2], src0.w = temp[2] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TXP temp[1], input[1].xy_w, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = input[0], src1.w = input[0] SEM_WAIT MAD temp[2].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[2].w, src0.w, src1.w, src0.0 3: src0.xyz = temp[2], src0.w = temp[2] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: BEGIN_TEX; 1: TXP temp[1], input[1].xy_w, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = input[0], src1.w = input[0] SEM_WAIT MAD temp[2].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[2].w, src0.w, src1.w, src0.0 3: src0.xyz = temp[2], src0.w = temp[2] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TXP temp[1], input[1].xy_w, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = input[0], src1.w = input[0] SEM_WAIT MAD temp[0].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[0].w, src0.w, src1.w, src0.0 3: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=3************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 1, tex_end: 0 (code_addr: 00400040) TEX: TXP t1, t1, texture[0] (00018041) 0: xyz: t1 t0 t0 bias-> t0.xyz (03800001) w: t1 t0 t0 bias-> t0.w (00800001) xyz: t1.xyz t0.xyz 0.0 op: 00050200 w: t1.w t0.w 0.0 op: 00040509 1: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..3] DCL TEMP[0] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: MOV OUT[1], IN[1] 5: END Vertex Program: before compilation # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[1], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[1]; 6: MOV output[2], temp[1]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[0], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[0]; 6: MOV output[2], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[0], input[0].xxxx, const[0]; 1: MAD temp[0], input[0].yyyy, const[1], temp[0]; 2: MAD temp[0], input[0].zzzz, const[2], temp[0]; 3: MAD temp[0], input[0].wwww, const[3], temp[0]; 4: MOV output[1], input[1]; 5: MOV output[0], temp[0]; 6: MOV output[2], temp[0]; Final vertex program code: 0: op: 0x00f00002 dst: 0t op: VE_MULTIPLY src0: 0x00000001 reg: 0i swiz: X/ X/ X/ X src1: 0x00d10002 reg: 0c swiz: X/ Y/ Z/ W src2: 0x01248002 reg: 0c swiz: 0/ 0/ 0/ 0 1: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00492001 reg: 0i swiz: Y/ Y/ Y/ Y src1: 0x00d10022 reg: 1c swiz: X/ Y/ Z/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 2: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00924001 reg: 0i swiz: Z/ Z/ Z/ Z src1: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 3: op: 0x00f00004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x00db6001 reg: 0i swiz: W/ W/ W/ W src1: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src2: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W 4: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 5: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 6: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL IN[1], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0] 0: TXP TEMP[0], IN[1].xyyw, SAMP[0], 2D 1: MUL TEMP[0].xyz, TEMP[0], IN[0] 2: MOV TEMP[0].xyz, TEMP[0].xyzx 3: MOV TEMP[0].w, IN[0].wwww 4: MOV OUT[0], TEMP[0] 5: END Fragment Program: before compilation # Radeon Compiler Program 0: TXP temp[0], input[1].xyyw, 2D[0]; 1: MUL temp[0].xyz, temp[0], input[0]; 2: MOV temp[0].xyz, temp[0].xyzx; 3: MOV temp[0].w, input[0].wwww; 4: MOV output[0], temp[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TXP temp[0], input[1].xyyw, 2D[0]; 1: MUL temp[0].xyz, temp[0], input[0]; 2: MOV temp[0].xyz, temp[0].xyzx; 3: MOV temp[0].w, input[0].wwww; 4: MOV output[0], temp[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TXP temp[0], input[1].xyyw, 2D[0]; 1: MUL temp[0].xyz, temp[0], input[0]; 2: MOV temp[0].xyz, temp[0].xyzx; 3: MOV temp[0].w, input[0].wwww; 4: MOV output[0], temp[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TXP temp[0], input[1].xyyw, 2D[0]; 1: MUL temp[0].xyz, temp[0], input[0]; 2: MOV temp[0].xyz, temp[0].xyzx; 3: MOV temp[0].w, input[0].wwww; 4: MOV output[0], temp[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TXP temp[0], input[1].xyyw, 2D[0]; 1: MUL temp[0].xyz, temp[0], input[0]; 2: MOV temp[0].xyz, temp[0].xyzx; 3: MOV temp[0].w, input[0].wwww; 4: MOV output[0], temp[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TXP temp[0], input[1].xyyw, 2D[0]; 1: MUL temp[0].xyz, temp[0], input[0]; 2: MOV temp[0].xyz, temp[0].xyzx; 3: MOV temp[0].w, input[0].wwww; 4: MOV output[0], temp[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TXP temp[0], input[1].xyyw, 2D[0]; 1: MUL temp[0].xyz, temp[0], input[0]; 2: MOV temp[0].xyz, temp[0].xyzx; 3: MOV temp[0].w, input[0].wwww; 4: MOV output[0], temp[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TXP temp[0].xyz, input[1].xy_w, 2D[0]; 1: MUL temp[0].xyz, temp[0].xyz_, input[0].xyz_; 2: MOV temp[0].xyz, temp[0].xyz_; 3: MOV temp[0].w, input[0].___w; 4: MOV output[0], temp[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TXP temp[0].xyz, input[1].xy_w, 2D[0]; 1: MUL temp[0].xyz, temp[0].xyz_, input[0].xyz_; 2: MOV temp[0].xyz, temp[0].xyz_; 3: MOV temp[0].w, input[0].___w; 4: MOV output[0], temp[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TXP temp[1].xyz, input[1].xy_w, 2D[0]; 1: MUL temp[2].xyz, temp[1].xyz_, input[0].xyz_; 2: MOV temp[0].xyz, temp[2].xyz_; 3: MOV temp[0].w, input[0].___w; 4: MOV output[0], temp[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TXP temp[1].xyz, input[1].xy_w, 2D[0]; 1: MUL temp[2].xyz, temp[1].xyz_, input[0].xyz_; 2: MOV temp[0].xyz, temp[2].xyz_; 3: MOV temp[0].w, input[0].___w; 4: MOV output[0], temp[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TXP temp[1].xyz, input[1].xy_w, 2D[0]; 1: MUL temp[2].xyz, temp[1].xyz_, input[0].xyz_; 2: MOV temp[0].xyz, temp[2].xyz_; 3: MOV temp[0].w, input[0].___w; 4: MOV output[0], temp[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TXP temp[1].xyz, input[1].xy_w, 2D[0]; 1: MUL temp[2].xyz, temp[1].xyz_, input[0].xyz_; 2: MOV temp[0].xyz, temp[2].xyz_; 3: MOV temp[0].w, input[0].___w; 4: MOV output[0], temp[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TXP temp[1].xyz, input[1].xy_w, 2D[0]; 1: src0.xyz = temp[1], src1.xyz = input[0] MAD temp[2].xyz, src0.xyz, src1.xyz, src0.000 2: src0.xyz = temp[2] MAD temp[0].xyz, src0.xyz, src0.111, src0.000 3: src0.w = input[0] MAD temp[0].w, src0.w, src0.1, src0.0 4: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TXP temp[1].xyz, input[1].xy_w, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[1], src0.w = input[0], src1.xyz = input[0] SEM_WAIT MAD temp[2].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[0].w, src0.w, src0.1, src0.0 3: src0.xyz = temp[2] MAD temp[0].xyz, src0.xyz, src0.111, src0.000 4: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: BEGIN_TEX; 1: TXP temp[1].xyz, input[1].xy_w, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[1], src0.w = input[0], src1.xyz = input[0] SEM_WAIT MAD temp[2].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[0].w, src0.w, src0.1, src0.0 3: src0.xyz = temp[2] MAD temp[0].xyz, src0.xyz, src0.111, src0.000 4: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TXP temp[1].xyz, input[1].xy_w, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[1], src0.w = input[0], src1.xyz = input[0] SEM_WAIT MAD temp[0].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[0].w, src0.w, src0.1, src0.0 3: src0.xyz = temp[0] MAD temp[0].xyz, src0.xyz, src0.111, src0.000 4: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=4************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 2, tex_end: 0 (code_addr: 00400080) TEX: TXP t1, t1, texture[0] (00018041) 0: xyz: t1 t0 t0 bias-> t0.xyz (03800001) w: t0 t0 t0 bias-> t0.w (00800000) xyz: t1.xyz t0.xyz 0.0 op: 00050200 w: t0.w 1.0 0.0 op: 00040889 1: xyz: t0 t0 t0 bias-> t0.xyz (03800000) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.x t0.x t0.x op: 00000000 2: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[0] DCL CONST[0..3] 0: MOV OUT[1], IN[1] 1: MOV OUT[2].xy, IN[2] 2: DP4 OUT[0].w, IN[0], CONST[0] 3: DP4 OUT[0].z, IN[0], CONST[1] 4: DP4 OUT[0].y, IN[0], CONST[2] 5: DP4 OUT[0].x, IN[0], CONST[3] 6: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2]; 2: DP4 temp[0].w, input[0], const[0]; 3: DP4 temp[0].z, input[0], const[1]; 4: DP4 temp[0].y, input[0], const[2]; 5: DP4 temp[0].x, input[0], const[3]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2]; 2: DP4 temp[0].w, input[0], const[0]; 3: DP4 temp[0].z, input[0], const[1]; 4: DP4 temp[0].y, input[0], const[2]; 5: DP4 temp[0].x, input[0], const[3]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2]; 2: DP4 temp[0].w, input[0], const[0]; 3: DP4 temp[0].z, input[0], const[1]; 4: DP4 temp[0].y, input[0], const[2]; 5: DP4 temp[0].x, input[0], const[3]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2]; 2: DP4 temp[0].w, input[0], const[0]; 3: DP4 temp[0].z, input[0], const[1]; 4: DP4 temp[0].y, input[0], const[2]; 5: DP4 temp[0].x, input[0], const[3]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2]; 2: DP4 temp[0].w, input[0], const[0]; 3: DP4 temp[0].z, input[0], const[1]; 4: DP4 temp[0].y, input[0], const[2]; 5: DP4 temp[0].x, input[0], const[3]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2]; 2: DP4 temp[0].w, input[0], const[0]; 3: DP4 temp[0].z, input[0], const[1]; 4: DP4 temp[0].y, input[0], const[2]; 5: DP4 temp[0].x, input[0], const[3]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: DP4 temp[0].w, input[0], const[0]; 3: DP4 temp[0].z, input[0], const[1]; 4: DP4 temp[0].y, input[0], const[2]; 5: DP4 temp[0].x, input[0], const[3]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: DP4 temp[0].w, input[0], const[0]; 3: DP4 temp[0].z, input[0], const[1]; 4: DP4 temp[0].y, input[0], const[2]; 5: DP4 temp[0].x, input[0], const[3]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: DP4 temp[0].w, input[0], const[0]; 3: DP4 temp[0].z, input[0], const[1]; 4: DP4 temp[0].y, input[0], const[2]; 5: DP4 temp[0].x, input[0], const[3]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: DP4 temp[0].w, input[0], const[0]; 3: DP4 temp[0].z, input[0], const[1]; 4: DP4 temp[0].y, input[0], const[2]; 5: DP4 temp[0].x, input[0], const[3]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[2].xy, input[2].xy__; 2: DP4 temp[0].w, input[0], const[0]; 3: DP4 temp[0].z, input[0], const[1]; 4: DP4 temp[0].y, input[0], const[2]; 5: DP4 temp[0].x, input[0], const[3]; 6: MOV output[0], temp[0]; 7: MOV output[3], temp[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00304203 dst: 2o op: VE_ADD src0: 0x01f90041 reg: 2i swiz: X/ Y/ U/ U src1: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 2: op: 0x00800001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10002 reg: 0c swiz: X/ Y/ Z/ W src2: 0x01248002 reg: 0c swiz: 0/ 0/ 0/ 0 3: op: 0x00400001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10022 reg: 1c swiz: X/ Y/ Z/ W src2: 0x01248022 reg: 1c swiz: 0/ 0/ 0/ 0 4: op: 0x00200001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src2: 0x01248042 reg: 2c swiz: 0/ 0/ 0/ 0 5: op: 0x00100001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src2: 0x01248062 reg: 3c swiz: 0/ 0/ 0/ 0 6: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 7: op: 0x00f06203 dst: 3o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0] 0: TEX TEMP[0], IN[0], SAMP[0], 2D 1: MUL OUT[0].xyz, TEMP[0], CONST[0] 2: MOV OUT[0].w, TEMP[0] 3: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[0], 2D[0]; 1: MUL output[0].xyz, temp[0], const[0]; 2: MOV output[0].w, temp[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[0], 2D[0]; 1: MUL output[0].xyz, temp[0], const[0]; 2: MOV output[0].w, temp[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[0], 2D[0]; 1: MUL output[0].xyz, temp[0], const[0]; 2: MOV output[0].w, temp[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[0], 2D[0]; 1: MUL output[0].xyz, temp[0], const[0]; 2: MOV output[0].w, temp[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[0], 2D[0]; 1: MUL output[0].xyz, temp[0], const[0]; 2: MOV output[0].w, temp[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[0], 2D[0]; 1: MUL output[0].xyz, temp[0], const[0]; 2: MOV output[0].w, temp[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[0], 2D[0]; 1: MUL output[0].xyz, temp[0], const[0]; 2: MOV output[0].w, temp[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MUL output[0].xyz, temp[0].xyz_, const[0].xyz_; 2: MOV output[0].w, temp[0].___w; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MUL output[0].xyz, temp[0].xyz_, const[0].xyz_; 2: MOV output[0].w, temp[0].___w; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MUL output[0].xyz, temp[1].xyz_, const[0].xyz_; 2: MOV output[0].w, temp[1].___w; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MUL output[0].xyz, temp[1].xyz_, const[0].xyz_; 2: MOV output[0].w, temp[1].___w; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MUL output[0].xyz, temp[1].xyz_, const[0].xyz_; 2: MOV output[0].w, temp[1].___w; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MUL output[0].xyz, temp[1].xyz_, const[0].xyz_; 2: MOV output[0].w, temp[1].___w; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: src0.xyz = temp[1], src1.xyz = const[0] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 2: src0.w = temp[1] MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], input[0].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = const[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], input[0].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = const[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[0].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[0], src0.w = temp[0], src1.xyz = const[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=5************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) TEX: TEX t0, t0, texture[0] (00008000) 0: xyz: t0 c0 t0 bias-> o0.xyz (1c000800) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz c0.xyz 0.0 op: 00050200 w: t0.w 1.0 0.0 op: 00040889 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], input[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = input[0], src0.w = input[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=6************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[1..5] DCL TEMP[0] IMM FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].x, -IN[0].wwww, IMM[0].xyyx 1: MOV TEMP[0].w, IN[0] 2: MAD TEMP[0].xyz, -TEMP[0].xxxx, CONST[1], IN[0] 3: DP4 OUT[0].w, TEMP[0], CONST[2] 4: DP4 OUT[0].z, TEMP[0], CONST[3] 5: DP4 OUT[0].y, TEMP[0], CONST[4] 6: DP4 OUT[0].x, TEMP[0], CONST[5] 7: END Vertex Program: before compilation # Radeon Compiler Program 0: ADD temp[0].x, -input[0].wwww, temp[0].1001; 1: MOV temp[0].w, input[0]; 2: MAD temp[0].xyz, -temp[0].xxxx, const[1], input[0]; 3: DP4 temp[1].w, temp[0], const[2]; 4: DP4 temp[1].z, temp[0], const[3]; 5: DP4 temp[1].y, temp[0], const[4]; 6: DP4 temp[1].x, temp[0], const[5]; 7: MOV output[0], temp[1]; 8: MOV output[1], temp[1]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: ADD temp[0].x, -input[0].wwww, temp[0].1001; 1: MOV temp[0].w, input[0]; 2: MAD temp[0].xyz, -temp[0].xxxx, const[1], input[0]; 3: DP4 temp[1].w, temp[0], const[2]; 4: DP4 temp[1].z, temp[0], const[3]; 5: DP4 temp[1].y, temp[0], const[4]; 6: DP4 temp[1].x, temp[0], const[5]; 7: MOV output[0], temp[1]; 8: MOV output[1], temp[1]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: ADD temp[0].x, -input[0].wwww, temp[0].1001; 1: MOV temp[0].w, input[0]; 2: MAD temp[0].xyz, -temp[0].xxxx, const[1], input[0]; 3: DP4 temp[1].w, temp[0], const[2]; 4: DP4 temp[1].z, temp[0], const[3]; 5: DP4 temp[1].y, temp[0], const[4]; 6: DP4 temp[1].x, temp[0], const[5]; 7: MOV output[0], temp[1]; 8: MOV output[1], temp[1]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: ADD temp[0].x, -input[0].wwww, temp[0].1001; 1: MOV temp[0].w, input[0]; 2: MAD temp[0].xyz, -temp[0].xxxx, const[1], input[0]; 3: DP4 temp[1].w, temp[0], const[2]; 4: DP4 temp[1].z, temp[0], const[3]; 5: DP4 temp[1].y, temp[0], const[4]; 6: DP4 temp[1].x, temp[0], const[5]; 7: MOV output[0], temp[1]; 8: MOV output[1], temp[1]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: ADD temp[0].x, -input[0].wwww, temp[0].1001; 1: MOV temp[0].w, input[0]; 2: MAD temp[0].xyz, -temp[0].xxxx, const[1], input[0]; 3: DP4 temp[1].w, temp[0], const[2]; 4: DP4 temp[1].z, temp[0], const[3]; 5: DP4 temp[1].y, temp[0], const[4]; 6: DP4 temp[1].x, temp[0], const[5]; 7: MOV output[0], temp[1]; 8: MOV output[1], temp[1]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: ADD temp[0].x, -input[0].wwww, temp[0].1001; 1: MOV temp[0].w, input[0]; 2: MAD temp[0].xyz, -temp[0].xxxx, const[1], input[0]; 3: DP4 temp[1].w, temp[0], const[2]; 4: DP4 temp[1].z, temp[0], const[3]; 5: DP4 temp[1].y, temp[0], const[4]; 6: DP4 temp[1].x, temp[0], const[5]; 7: MOV output[0], temp[1]; 8: MOV output[1], temp[1]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: ADD temp[0].x, -input[0].w___, temp[0].1___; 1: MOV temp[0].w, input[0].___w; 2: MAD temp[0].xyz, -temp[0].xxx_, const[1].xyz_, input[0].xyz_; 3: DP4 temp[1].w, temp[0], const[2]; 4: DP4 temp[1].z, temp[0], const[3]; 5: DP4 temp[1].y, temp[0], const[4]; 6: DP4 temp[1].x, temp[0], const[5]; 7: MOV output[0], temp[1]; 8: MOV output[1], temp[1]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: ADD temp[0].x, -input[0].w___, none.1___; 1: MOV temp[0].w, input[0].___w; 2: MAD temp[0].xyz, -temp[0].xxx_, const[1].xyz_, input[0].xyz_; 3: DP4 temp[1].w, temp[0], const[2]; 4: DP4 temp[1].z, temp[0], const[3]; 5: DP4 temp[1].y, temp[0], const[4]; 6: DP4 temp[1].x, temp[0], const[5]; 7: MOV output[0], temp[1]; 8: MOV output[1], temp[1]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: ADD temp[0].x, -input[0].w___, none.1___; 1: MOV temp[0].w, input[0].___w; 2: MAD temp[0].xyz, -temp[0].xxx_, const[1].xyz_, input[0].xyz_; 3: DP4 temp[1].w, temp[0], const[2]; 4: DP4 temp[1].z, temp[0], const[3]; 5: DP4 temp[1].y, temp[0], const[4]; 6: DP4 temp[1].x, temp[0], const[5]; 7: MOV output[0], temp[1]; 8: MOV output[1], temp[1]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: ADD temp[0].x, -input[0].w___, none.1___; 1: MOV temp[0].w, input[0].___w; 2: MAD temp[0].xyz, -temp[0].xxx_, const[1].xyz_, input[0].xyz_; 3: DP4 temp[1].w, temp[0], const[2]; 4: DP4 temp[1].z, temp[0], const[3]; 5: DP4 temp[1].y, temp[0], const[4]; 6: DP4 temp[1].x, temp[0], const[5]; 7: MOV output[0], temp[1]; 8: MOV output[1], temp[1]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: ADD temp[0].x, -input[0].w___, none.1___; 1: MOV temp[0].w, input[0].___w; 2: MAD temp[0].xyz, -temp[0].xxx_, const[1].xyz_, input[0].xyz_; 3: DP4 temp[1].w, temp[0], const[2]; 4: DP4 temp[1].z, temp[0], const[3]; 5: DP4 temp[1].y, temp[0], const[4]; 6: DP4 temp[1].x, temp[0], const[5]; 7: MOV output[0], temp[1]; 8: MOV output[1], temp[1]; Final vertex program code: 0: op: 0x00100003 dst: 0t op: VE_ADD src0: 0x1fff6001 reg: 0i swiz: -W/-U/-U/-U src1: 0x01ffa000 reg: 0t swiz: 1/ U/ U/ U src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 1: op: 0x00800003 dst: 0t op: VE_ADD src0: 0x00ffe001 reg: 0i swiz: U/ U/ U/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 2: op: 0x00700004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x1fc00000 reg: 0t swiz: -X/-X/-X/-U src1: 0x01d10022 reg: 1c swiz: X/ Y/ Z/ U src2: 0x01d10001 reg: 0i swiz: X/ Y/ Z/ U 3: op: 0x00802001 dst: 1t op: VE_DOT_PRODUCT src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src2: 0x01248042 reg: 2c swiz: 0/ 0/ 0/ 0 4: op: 0x00402001 dst: 1t op: VE_DOT_PRODUCT src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src2: 0x01248062 reg: 3c swiz: 0/ 0/ 0/ 0 5: op: 0x00202001 dst: 1t op: VE_DOT_PRODUCT src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src2: 0x01248082 reg: 4c swiz: 0/ 0/ 0/ 0 6: op: 0x00102001 dst: 1t op: VE_DOT_PRODUCT src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src2: 0x012480a2 reg: 5c swiz: 0/ 0/ 0/ 0 7: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W src1: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 src2: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 8: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W src1: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 src2: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL OUT[0], COLOR IMM FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxx 1: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV output[0], temp[0].1111; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV output[0], temp[0].1111; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV output[0], temp[0].1111; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[0], temp[0].1111; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[0], temp[0].1111; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV output[0], temp[0].1111; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[0], temp[0].1111; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[0], temp[0].1111; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MOV output[0], temp[0].1111; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV output[0], temp[0].1111; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], none.1111; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[0], none.1111; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], none.1111; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: MAD color[0].xyz, src0.111, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: MAD color[0].xyz, src0.111, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: MAD color[0].xyz, src0.111, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: MAD color[0].xyz, src0.111, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 pc=7************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: 1.0 1.0 0.0 op: 00050a95 w: 1.0 1.0 0.0 op: 00040891 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..6] DCL TEMP[0..1] 0: MOV TEMP[0].xyz, IN[3] 1: MUL TEMP[1].xyz, IN[1].zxyw, TEMP[0].yzxw 2: MAD TEMP[1].xyz, IN[1].yzxw, TEMP[0].zxyw, -TEMP[1] 3: ADD TEMP[0].xyz, -IN[0], CONST[0] 4: MUL TEMP[0].xyz, TEMP[0], CONST[1] 5: MUL TEMP[1].xyz, TEMP[1], IN[3].wwww 6: DP3 OUT[3].y, TEMP[0], TEMP[1] 7: MOV OUT[1], CONST[2] 8: MOV OUT[2].xyz, IN[2] 9: DP3 OUT[3].z, IN[1], TEMP[0] 10: DP3 OUT[3].x, TEMP[0], IN[3] 11: DP4 OUT[0].w, IN[0], CONST[3] 12: DP4 OUT[0].z, IN[0], CONST[4] 13: DP4 OUT[0].y, IN[0], CONST[5] 14: DP4 OUT[0].x, IN[0], CONST[6] 15: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0].xyz, input[3]; 1: MUL temp[1].xyz, input[1].zxyw, temp[0].yzxw; 2: MAD temp[1].xyz, input[1].yzxw, temp[0].zxyw, -temp[1]; 3: ADD temp[0].xyz, -input[0], const[0]; 4: MUL temp[0].xyz, temp[0], const[1]; 5: MUL temp[1].xyz, temp[1], input[3].wwww; 6: DP3 output[3].y, temp[0], temp[1]; 7: MOV output[1], const[2]; 8: MOV output[2].xyz, input[2]; 9: DP3 output[3].z, input[1], temp[0]; 10: DP3 output[3].x, temp[0], input[3]; 11: DP4 temp[2].w, input[0], const[3]; 12: DP4 temp[2].z, input[0], const[4]; 13: DP4 temp[2].y, input[0], const[5]; 14: DP4 temp[2].x, input[0], const[6]; 15: MOV output[0], temp[2]; 16: MOV output[4], temp[2]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV temp[0].xyz, input[3]; 1: MUL temp[1].xyz, input[1].zxyw, temp[0].yzxw; 2: MAD temp[1].xyz, input[1].yzxw, temp[0].zxyw, -temp[1]; 3: ADD temp[0].xyz, -input[0], const[0]; 4: MUL temp[0].xyz, temp[0], const[1]; 5: MUL temp[1].xyz, temp[1], input[3].wwww; 6: DP3 output[3].y, temp[0], temp[1]; 7: MOV output[1], const[2]; 8: MOV output[2].xyz, input[2]; 9: DP3 output[3].z, input[1], temp[0]; 10: DP3 output[3].x, temp[0], input[3]; 11: DP4 temp[2].w, input[0], const[3]; 12: DP4 temp[2].z, input[0], const[4]; 13: DP4 temp[2].y, input[0], const[5]; 14: DP4 temp[2].x, input[0], const[6]; 15: MOV output[0], temp[2]; 16: MOV output[4], temp[2]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV temp[0].xyz, input[3]; 1: MUL temp[1].xyz, input[1].zxyw, temp[0].yzxw; 2: MAD temp[1].xyz, input[1].yzxw, temp[0].zxyw, -temp[1]; 3: ADD temp[0].xyz, -input[0], const[0]; 4: MUL temp[0].xyz, temp[0], const[1]; 5: MUL temp[1].xyz, temp[1], input[3].wwww; 6: DP3 output[3].y, temp[0], temp[1]; 7: MOV output[1], const[2]; 8: MOV output[2].xyz, input[2]; 9: DP3 output[3].z, input[1], temp[0]; 10: DP3 output[3].x, temp[0], input[3]; 11: DP4 temp[2].w, input[0], const[3]; 12: DP4 temp[2].z, input[0], const[4]; 13: DP4 temp[2].y, input[0], const[5]; 14: DP4 temp[2].x, input[0], const[6]; 15: MOV output[0], temp[2]; 16: MOV output[4], temp[2]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0].xyz, input[3]; 1: MUL temp[1].xyz, input[1].zxyw, temp[0].yzxw; 2: MAD temp[1].xyz, input[1].yzxw, temp[0].zxyw, -temp[1]; 3: ADD temp[0].xyz, -input[0], const[0]; 4: MUL temp[0].xyz, temp[0], const[1]; 5: MUL temp[1].xyz, temp[1], input[3].wwww; 6: DP3 output[3].y, temp[0], temp[1]; 7: MOV output[1], const[2]; 8: MOV output[2].xyz, input[2]; 9: DP3 output[3].z, input[1], temp[0]; 10: DP3 output[3].x, temp[0], input[3]; 11: DP4 temp[2].w, input[0], const[3]; 12: DP4 temp[2].z, input[0], const[4]; 13: DP4 temp[2].y, input[0], const[5]; 14: DP4 temp[2].x, input[0], const[6]; 15: MOV output[0], temp[2]; 16: MOV output[4], temp[2]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0].xyz, input[3]; 1: MUL temp[1].xyz, input[1].zxyw, temp[0].yzxw; 2: MAD temp[1].xyz, input[1].yzxw, temp[0].zxyw, -temp[1]; 3: ADD temp[0].xyz, -input[0], const[0]; 4: MUL temp[0].xyz, temp[0], const[1]; 5: MUL temp[1].xyz, temp[1], input[3].wwww; 6: DP4 output[3].y, temp[0].xyz0, temp[1].xyz0; 7: MOV output[1], const[2]; 8: MOV output[2].xyz, input[2]; 9: DP4 output[3].z, input[1].xyz0, temp[0].xyz0; 10: DP4 output[3].x, temp[0].xyz0, input[3].xyz0; 11: DP4 temp[2].w, input[0], const[3]; 12: DP4 temp[2].z, input[0], const[4]; 13: DP4 temp[2].y, input[0], const[5]; 14: DP4 temp[2].x, input[0], const[6]; 15: MOV output[0], temp[2]; 16: MOV output[4], temp[2]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV temp[0].xyz, input[3]; 1: MUL temp[1].xyz, input[1].zxyw, temp[0].yzxw; 2: MAD temp[1].xyz, input[1].yzxw, temp[0].zxyw, -temp[1]; 3: ADD temp[0].xyz, -input[0], const[0]; 4: MUL temp[0].xyz, temp[0], const[1]; 5: MUL temp[1].xyz, temp[1], input[3].wwww; 6: DP4 output[3].y, temp[0].xyz0, temp[1].xyz0; 7: MOV output[1], const[2]; 8: MOV output[2].xyz, input[2]; 9: DP4 output[3].z, input[1].xyz0, temp[0].xyz0; 10: DP4 output[3].x, temp[0].xyz0, input[3].xyz0; 11: DP4 temp[2].w, input[0], const[3]; 12: DP4 temp[2].z, input[0], const[4]; 13: DP4 temp[2].y, input[0], const[5]; 14: DP4 temp[2].x, input[0], const[6]; 15: MOV output[0], temp[2]; 16: MOV output[4], temp[2]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0].xyz, input[3].xyz_; 1: MUL temp[1].xyz, input[1].zxy_, temp[0].yzx_; 2: MAD temp[1].xyz, input[1].yzx_, temp[0].zxy_, -temp[1].xyz_; 3: ADD temp[0].xyz, -input[0].xyz_, const[0].xyz_; 4: MUL temp[0].xyz, temp[0].xyz_, const[1].xyz_; 5: MUL temp[1].xyz, temp[1].xyz_, input[3].www_; 6: DP4 output[3].y, temp[0].xyz0, temp[1].xyz0; 7: MOV output[1], const[2]; 8: MOV output[2].xyz, input[2].xyz_; 9: DP4 output[3].z, input[1].xyz0, temp[0].xyz0; 10: DP4 output[3].x, temp[0].xyz0, input[3].xyz0; 11: DP4 temp[2].w, input[0], const[3]; 12: DP4 temp[2].z, input[0], const[4]; 13: DP4 temp[2].y, input[0], const[5]; 14: DP4 temp[2].x, input[0], const[6]; 15: MOV output[0], temp[2]; 16: MOV output[4], temp[2]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[1].xyz, input[1].zxy_, input[3].yzx_; 1: MAD temp[1].xyz, input[1].yzx_, input[3].zxy_, -temp[1].xyz_; 2: ADD temp[0].xyz, -input[0].xyz_, const[0].xyz_; 3: MUL temp[0].xyz, temp[0].xyz_, const[1].xyz_; 4: MUL temp[1].xyz, temp[1].xyz_, input[3].www_; 5: DP4 output[3].y, temp[0].xyz0, temp[1].xyz0; 6: MOV output[1], const[2]; 7: MOV output[2].xyz, input[2].xyz_; 8: DP4 output[3].z, input[1].xyz0, temp[0].xyz0; 9: DP4 output[3].x, temp[0].xyz0, input[3].xyz0; 10: DP4 temp[2].w, input[0], const[3]; 11: DP4 temp[2].z, input[0], const[4]; 12: DP4 temp[2].y, input[0], const[5]; 13: DP4 temp[2].x, input[0], const[6]; 14: MOV output[0], temp[2]; 15: MOV output[4], temp[2]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV temp[3], input[3].yzx_; 1: MUL temp[1].xyz, input[1].zxy_, temp[3]; 2: MOV temp[4], input[3].zxy_; 3: MAD temp[1].xyz, input[1].yzx_, temp[4], -temp[1].xyz_; 4: ADD temp[0].xyz, -input[0].xyz_, const[0].xyz_; 5: MUL temp[0].xyz, temp[0].xyz_, const[1].xyz_; 6: MUL temp[1].xyz, temp[1].xyz_, input[3].www_; 7: DP4 output[3].y, temp[0].xyz0, temp[1].xyz0; 8: MOV output[1], const[2]; 9: MOV output[2].xyz, input[2].xyz_; 10: DP4 output[3].z, input[1].xyz0, temp[0].xyz0; 11: DP4 output[3].x, temp[0].xyz0, input[3].xyz0; 12: DP4 temp[2].w, input[0], const[3]; 13: DP4 temp[2].z, input[0], const[4]; 14: DP4 temp[2].y, input[0], const[5]; 15: DP4 temp[2].x, input[0], const[6]; 16: MOV output[0], temp[2]; 17: MOV output[4], temp[2]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV temp[0], input[3].yzx_; 1: MUL temp[0].xyz, input[1].zxy_, temp[0]; 2: MOV temp[1], input[3].zxy_; 3: MAD temp[0].xyz, input[1].yzx_, temp[1], -temp[0].xyz_; 4: ADD temp[1].xyz, -input[0].xyz_, const[0].xyz_; 5: MUL temp[1].xyz, temp[1].xyz_, const[1].xyz_; 6: MUL temp[0].xyz, temp[0].xyz_, input[3].www_; 7: DP4 output[3].y, temp[1].xyz0, temp[0].xyz0; 8: MOV output[1], const[2]; 9: MOV output[2].xyz, input[2].xyz_; 10: DP4 output[3].z, input[1].xyz0, temp[1].xyz0; 11: DP4 output[3].x, temp[1].xyz0, input[3].xyz0; 12: DP4 temp[0].w, input[0], const[3]; 13: DP4 temp[0].z, input[0], const[4]; 14: DP4 temp[0].y, input[0], const[5]; 15: DP4 temp[0].x, input[0], const[6]; 16: MOV output[0], temp[0]; 17: MOV output[4], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV temp[0], input[3].yzx_; 1: MUL temp[0].xyz, input[1].zxy_, temp[0]; 2: MOV temp[1], input[3].zxy_; 3: MAD temp[0].xyz, input[1].yzx_, temp[1], -temp[0].xyz_; 4: ADD temp[1].xyz, -input[0].xyz_, const[0].xyz_; 5: MUL temp[1].xyz, temp[1].xyz_, const[1].xyz_; 6: MUL temp[0].xyz, temp[0].xyz_, input[3].www_; 7: DP4 output[3].y, temp[1].xyz0, temp[0].xyz0; 8: MOV output[1], const[2]; 9: MOV output[2].xyz, input[2].xyz_; 10: DP4 output[3].z, input[1].xyz0, temp[1].xyz0; 11: DP4 output[3].x, temp[1].xyz0, input[3].xyz0; 12: DP4 temp[0].w, input[0], const[3]; 13: DP4 temp[0].z, input[0], const[4]; 14: DP4 temp[0].y, input[0], const[5]; 15: DP4 temp[0].x, input[0], const[6]; 16: MOV output[0], temp[0]; 17: MOV output[4], temp[0]; Final vertex program code: 0: op: 0x00f00003 dst: 0t op: VE_ADD src0: 0x01c22061 reg: 3i swiz: Y/ Z/ X/ U src1: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 1: op: 0x00700002 dst: 0t op: VE_MULTIPLY src0: 0x01c84021 reg: 1i swiz: Z/ X/ Y/ U src1: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 2: op: 0x00f02003 dst: 1t op: VE_ADD src0: 0x01c84061 reg: 3i swiz: Z/ X/ Y/ U src1: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 3: op: 0x00700004 dst: 0t op: VE_MULTIPLY_ADD src0: 0x01c22021 reg: 1i swiz: Y/ Z/ X/ U src1: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W src2: 0x1fd10000 reg: 0t swiz: -X/-Y/-Z/-U 4: op: 0x00702003 dst: 1t op: VE_ADD src0: 0x1fd10001 reg: 0i swiz: -X/-Y/-Z/-U src1: 0x01d10002 reg: 0c swiz: X/ Y/ Z/ U src2: 0x01248002 reg: 0c swiz: 0/ 0/ 0/ 0 5: op: 0x00702002 dst: 1t op: VE_MULTIPLY src0: 0x01d10020 reg: 1t swiz: X/ Y/ Z/ U src1: 0x01d10022 reg: 1c swiz: X/ Y/ Z/ U src2: 0x01248022 reg: 1c swiz: 0/ 0/ 0/ 0 6: op: 0x00700002 dst: 0t op: VE_MULTIPLY src0: 0x01d10000 reg: 0t swiz: X/ Y/ Z/ U src1: 0x01db6061 reg: 3i swiz: W/ W/ W/ U src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 7: op: 0x00206201 dst: 3o op: VE_DOT_PRODUCT src0: 0x01110020 reg: 1t swiz: X/ Y/ Z/ 0 src1: 0x01110000 reg: 0t swiz: X/ Y/ Z/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 8: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src1: 0x01248042 reg: 2c swiz: 0/ 0/ 0/ 0 src2: 0x01248042 reg: 2c swiz: 0/ 0/ 0/ 0 9: op: 0x00704203 dst: 2o op: VE_ADD src0: 0x01d10041 reg: 2i swiz: X/ Y/ Z/ U src1: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 10: op: 0x00406201 dst: 3o op: VE_DOT_PRODUCT src0: 0x01110021 reg: 1i swiz: X/ Y/ Z/ 0 src1: 0x01110020 reg: 1t swiz: X/ Y/ Z/ 0 src2: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 11: op: 0x00106201 dst: 3o op: VE_DOT_PRODUCT src0: 0x01110020 reg: 1t swiz: X/ Y/ Z/ 0 src1: 0x01110061 reg: 3i swiz: X/ Y/ Z/ 0 src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 12: op: 0x00800001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src2: 0x01248062 reg: 3c swiz: 0/ 0/ 0/ 0 13: op: 0x00400001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src2: 0x01248082 reg: 4c swiz: 0/ 0/ 0/ 0 14: op: 0x00200001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src2: 0x012480a2 reg: 5c swiz: 0/ 0/ 0/ 0 15: op: 0x00100001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src2: 0x012480c2 reg: 6c swiz: 0/ 0/ 0/ 0 16: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 17: op: 0x00f08203 dst: 4o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL IN[1], GENERIC[0], PERSPECTIVE DCL IN[2], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL TEMP[0..3] IMM FLT32 { 0.0000, 2.0000, 1.0000, 0.0000} 0: TEX TEMP[0].xyz, IN[1], SAMP[0], 2D 1: TEX TEMP[1].xyz, IN[2], SAMP[2], CUBE 2: TEX TEMP[2].xyz, IN[1], SAMP[1], 2D 3: DP3 TEMP[0].w, IN[2], IN[2] 4: MAD TEMP[2].xyz, TEMP[2], IMM[0].yyyy, -IMM[0].zzzz 5: MAD TEMP[1].xyz, TEMP[1], IMM[0].yyyy, -IMM[0].zzzz 6: MUL TEMP[0].xyz, TEMP[0], IN[0] 7: MOV OUT[0].w, IMM[0].xxxx 8: TEX TEMP[3].x, TEMP[0].wwww, SAMP[3], 1D 9: DP3 TEMP[0].w, TEMP[1], TEMP[2] 10: MUL TEMP[0].xyz, TEMP[0], TEMP[3].xxxx 11: MUL OUT[0].xyz, TEMP[0], TEMP[0].wwww 12: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].xyz, input[2], CUBE[2]; 2: TEX temp[2].xyz, input[1], 2D[1]; 3: DP3 temp[0].w, input[2], input[2]; 4: MAD temp[2].xyz, temp[2], const[0].yyyy, -const[0].zzzz; 5: MAD temp[1].xyz, temp[1], const[0].yyyy, -const[0].zzzz; 6: MUL temp[0].xyz, temp[0], input[0]; 7: MOV output[0].w, const[0].xxxx; 8: TEX temp[3].x, temp[0].wwww, 1D[3]; 9: DP3 temp[0].w, temp[1], temp[2]; 10: MUL temp[0].xyz, temp[0], temp[3].xxxx; 11: MUL output[0].xyz, temp[0], temp[0].wwww; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].xyz, input[2], CUBE[2]; 2: TEX temp[2].xyz, input[1], 2D[1]; 3: DP3 temp[0].w, input[2], input[2]; 4: MAD temp[2].xyz, temp[2], const[0].yyyy, -const[0].zzzz; 5: MAD temp[1].xyz, temp[1], const[0].yyyy, -const[0].zzzz; 6: MUL temp[0].xyz, temp[0], input[0]; 7: MOV output[0].w, const[0].xxxx; 8: TEX temp[3].x, temp[0].wwww, 1D[3]; 9: DP3 temp[0].w, temp[1], temp[2]; 10: MUL temp[0].xyz, temp[0], temp[3].xxxx; 11: MUL output[0].xyz, temp[0], temp[0].wwww; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].xyz, input[2], CUBE[2]; 2: TEX temp[2].xyz, input[1], 2D[1]; 3: DP3 temp[0].w, input[2], input[2]; 4: MAD temp[2].xyz, temp[2], const[0].yyyy, -const[0].zzzz; 5: MAD temp[1].xyz, temp[1], const[0].yyyy, -const[0].zzzz; 6: MUL temp[0].xyz, temp[0], input[0]; 7: MOV output[0].w, const[0].xxxx; 8: TEX temp[3].x, temp[0].wwww, 1D[3]; 9: DP3 temp[0].w, temp[1], temp[2]; 10: MUL temp[0].xyz, temp[0], temp[3].xxxx; 11: MUL output[0].xyz, temp[0], temp[0].wwww; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].xyz, input[2], CUBE[2]; 2: TEX temp[2].xyz, input[1], 2D[1]; 3: DP3 temp[0].w, input[2], input[2]; 4: MAD temp[2].xyz, temp[2], const[0].yyyy, -const[0].zzzz; 5: MAD temp[1].xyz, temp[1], const[0].yyyy, -const[0].zzzz; 6: MUL temp[0].xyz, temp[0], input[0]; 7: MOV output[0].w, const[0].xxxx; 8: TEX temp[3].x, temp[0].wwww, 1D[3]; 9: DP3 temp[0].w, temp[1], temp[2]; 10: MUL temp[0].xyz, temp[0], temp[3].xxxx; 11: MUL output[0].xyz, temp[0], temp[0].wwww; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].xyz, input[2], CUBE[2]; 2: TEX temp[2].xyz, input[1], 2D[1]; 3: DP3 temp[0].w, input[2], input[2]; 4: MAD temp[2].xyz, temp[2], const[0].yyyy, -const[0].zzzz; 5: MAD temp[1].xyz, temp[1], const[0].yyyy, -const[0].zzzz; 6: MUL temp[0].xyz, temp[0], input[0]; 7: MOV output[0].w, const[0].xxxx; 8: TEX temp[3].x, temp[0].wwww, 1D[3]; 9: DP3 temp[0].w, temp[1], temp[2]; 10: MUL temp[0].xyz, temp[0], temp[3].xxxx; 11: MUL output[0].xyz, temp[0], temp[0].wwww; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[4], input[1], 2D[0]; 1: MOV temp[0].xyz, temp[4]; 2: TEX temp[5], input[2], CUBE[2]; 3: MOV temp[1].xyz, temp[5]; 4: TEX temp[6], input[1], 2D[1]; 5: MOV temp[2].xyz, temp[6]; 6: DP3 temp[0].w, input[2], input[2]; 7: MAD temp[2].xyz, temp[2], const[0].yyyy, -const[0].zzzz; 8: MAD temp[1].xyz, temp[1], const[0].yyyy, -const[0].zzzz; 9: MUL temp[0].xyz, temp[0], input[0]; 10: MOV output[0].w, const[0].xxxx; 11: TEX temp[7], temp[0].wwww, 1D[3]; 12: MOV temp[3].x, temp[7]; 13: DP3 temp[0].w, temp[1], temp[2]; 14: MUL temp[0].xyz, temp[0], temp[3].xxxx; 15: MUL output[0].xyz, temp[0], temp[0].wwww; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[4], input[1], 2D[0]; 1: MOV temp[0].xyz, temp[4]; 2: TEX temp[5], input[2], CUBE[2]; 3: MOV temp[1].xyz, temp[5]; 4: TEX temp[6], input[1], 2D[1]; 5: MOV temp[2].xyz, temp[6]; 6: DP3 temp[0].w, input[2], input[2]; 7: MAD temp[2].xyz, temp[2], const[0].yyyy, -const[0].zzzz; 8: MAD temp[1].xyz, temp[1], const[0].yyyy, -const[0].zzzz; 9: MUL temp[0].xyz, temp[0], input[0]; 10: MOV output[0].w, const[0].xxxx; 11: TEX temp[7], temp[0].wwww, 1D[3]; 12: MOV temp[3].x, temp[7]; 13: DP3 temp[0].w, temp[1], temp[2]; 14: MUL temp[0].xyz, temp[0], temp[3].xxxx; 15: MUL output[0].xyz, temp[0], temp[0].wwww; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[4].xyz, input[1].xy__, 2D[0]; 1: MOV temp[0].xyz, temp[4].xyz_; 2: TEX temp[5].xyz, input[2].xyz_, CUBE[2]; 3: MOV temp[1].xyz, temp[5].xyz_; 4: TEX temp[6].xyz, input[1].xy__, 2D[1]; 5: MOV temp[2].xyz, temp[6].xyz_; 6: DP3 temp[0].w, input[2].xyz_, input[2].xyz_; 7: MAD temp[2].xyz, temp[2].xyz_, const[0].yyy_, -const[0].zzz_; 8: MAD temp[1].xyz, temp[1].xyz_, const[0].yyy_, -const[0].zzz_; 9: MUL temp[0].xyz, temp[0].xyz_, input[0].xyz_; 10: MOV output[0].w, const[0].___x; 11: TEX temp[7].x, temp[0].w___, 1D[3]; 12: MOV temp[3].x, temp[7].x___; 13: DP3 temp[0].w, temp[1].xyz_, temp[2].xyz_; 14: MUL temp[0].xyz, temp[0].xyz_, temp[3].xxx_; 15: MUL output[0].xyz, temp[0].xyz_, temp[0].www_; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[4].xyz, input[1].xy__, 2D[0]; 1: MOV temp[0].xyz, temp[4].xyz_; 2: TEX temp[5].xyz, input[2].xyz_, CUBE[2]; 3: MOV temp[1].xyz, temp[5].xyz_; 4: TEX temp[6].xyz, input[1].xy__, 2D[1]; 5: MOV temp[2].xyz, temp[6].xyz_; 6: DP3 temp[0].w, input[2].xyz_, input[2].xyz_; 7: MAD temp[2].xyz, temp[2].xyz_, const[0].yyy_, -const[0].zzz_; 8: MAD temp[1].xyz, temp[1].xyz_, const[0].yyy_, -const[0].zzz_; 9: MUL temp[0].xyz, temp[0].xyz_, input[0].xyz_; 10: MOV output[0].w, const[0].___x; 11: TEX temp[7].x, temp[0].w___, 1D[3]; 12: MOV temp[3].x, temp[7].x___; 13: DP3 temp[0].w, temp[1].xyz_, temp[2].xyz_; 14: MUL temp[0].xyz, temp[0].xyz_, temp[3].xxx_; 15: MUL output[0].xyz, temp[0].xyz_, temp[0].www_; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[8].xyz, input[1].xy__, 2D[0]; 1: MOV temp[9].xyz, temp[8].xyz_; 2: TEX temp[10].xyz, input[2].xyz_, CUBE[2]; 3: MOV temp[11].xyz, temp[10].xyz_; 4: TEX temp[12].xyz, input[1].xy__, 2D[1]; 5: MOV temp[13].xyz, temp[12].xyz_; 6: DP3 temp[14].w, input[2].xyz_, input[2].xyz_; 7: MAD temp[15].xyz, temp[13].xyz_, const[0].yyy_, -const[0].zzz_; 8: MAD temp[16].xyz, temp[11].xyz_, const[0].yyy_, -const[0].zzz_; 9: MUL temp[17].xyz, temp[9].xyz_, input[0].xyz_; 10: MOV output[0].w, const[0].___x; 11: TEX temp[18].x, temp[14].w___, 1D[3]; 12: MOV temp[19].x, temp[18].x___; 13: DP3 temp[20].w, temp[16].xyz_, temp[15].xyz_; 14: MUL temp[21].xyz, temp[17].xyz_, temp[19].xxx_; 15: MUL output[0].xyz, temp[21].xyz_, temp[20].www_; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[8].xyz, input[1].xy__, 2D[0]; 1: TEX temp[10].xyz, input[2].xyz_, CUBE[2]; 2: TEX temp[12].xyz, input[1].xy__, 2D[1]; 3: DP3 temp[14].w, input[2].xyz_, input[2].xyz_; 4: MAD temp[15].xyz, temp[12].xyz_, const[0].yyy_, -none.111_; 5: MAD temp[16].xyz, temp[10].xyz_, const[0].yyy_, -none.111_; 6: MUL temp[17].xyz, temp[8].xyz_, input[0].xyz_; 7: MOV output[0].w, none.___0; 8: TEX temp[18].x, temp[14].w___, 1D[3]; 9: DP3 temp[20].w, temp[16].xyz_, temp[15].xyz_; 10: MUL temp[21].xyz, temp[17].xyz_, temp[18].xxx_; 11: MUL output[0].xyz, temp[21].xyz_, temp[20].www_; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[8].xyz, input[1].xy__, 2D[0]; 1: TEX temp[10].xyz, input[2].xyz_, CUBE[2]; 2: TEX temp[12].xyz, input[1].xy__, 2D[1]; 3: DP3 temp[14].w, input[2].xyz_, input[2].xyz_; 4: MAD temp[15].xyz, temp[12].xyz_, const[0].yyy_, -none.111_; 5: MAD temp[16].xyz, temp[10].xyz_, const[0].yyy_, -none.111_; 6: MUL temp[17].xyz, temp[8].xyz_, input[0].xyz_; 7: MOV output[0].w, none.___0; 8: MOV temp[0].x, temp[14].w___; 9: TEX temp[18].x, temp[0].x___, 1D[3]; 10: DP3 temp[20].w, temp[16].xyz_, temp[15].xyz_; 11: MUL temp[21].xyz, temp[17].xyz_, temp[18].xxx_; 12: MUL output[0].xyz, temp[21].xyz_, temp[20].www_; CONST[0] = { 0.0000 2.0000 1.0000 0.0000 } Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[8].xyz, input[1].xy__, 2D[0]; 1: TEX temp[10].xyz, input[2].xyz_, CUBE[2]; 2: TEX temp[12].xyz, input[1].xy__, 2D[1]; 3: DP3 temp[14].w, input[2].xyz_, input[2].xyz_; 4: MAD temp[15].xyz, temp[12].xyz_, const[0].yyy_, -none.111_; 5: MAD temp[16].xyz, temp[10].xyz_, const[0].yyy_, -none.111_; 6: MUL temp[17].xyz, temp[8].xyz_, input[0].xyz_; 7: MOV output[0].w, none.___0; 8: MOV temp[0].x, temp[14].w___; 9: TEX temp[18].x, temp[0].x___, 1D[3]; 10: DP3 temp[20].w, temp[16].xyz_, temp[15].xyz_; 11: MUL temp[21].xyz, temp[17].xyz_, temp[18].xxx_; 12: MUL output[0].xyz, temp[21].xyz_, temp[20].www_; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[8].xyz, input[1].xy__, 2D[0]; 1: TEX temp[10].xyz, input[2].xyz_, CUBE[2]; 2: TEX temp[12].xyz, input[1].xy__, 2D[1]; 3: src0.xyz = input[2] DP3, src0.xyz, src0.xyz DP3 temp[14].w, src0._, src0._ 4: src0.xyz = temp[12], src1.xyz = const[0] MAD temp[15].xyz, src0.xyz, src1.yyy, -src0.111 5: src0.xyz = temp[10], src1.xyz = const[0] MAD temp[16].xyz, src0.xyz, src1.yyy, -src0.111 6: src0.xyz = temp[8], src1.xyz = input[0] MAD temp[17].xyz, src0.xyz, src1.xyz, src0.000 7: MAD color[0].w, src0.0, src0.1, src0.0 8: src0.w = temp[14] MAD temp[0].x, src0.w__, src0.111, src0.000 9: TEX temp[18].x, temp[0].x___, 1D[3]; 10: src0.xyz = temp[16], src1.xyz = temp[15] DP3, src0.xyz, src1.xyz DP3 temp[20].w, src0._, src0._ 11: src0.xyz = temp[17], src1.xyz = temp[18] MAD temp[21].xyz, src0.xyz, src1.xxx, src0.000 12: src0.xyz = temp[21], src0.w = temp[20] MAD color[0].xyz, src0.xyz, src0.www, src0.000 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[2] DP3, src0.xyz, src0.xyz DP3 temp[14].w, src0._, src0._ 1: src0.w = temp[14] MAD temp[0].x, src0.w__, src0.111, src0.000 2: BEGIN_TEX; 3: TEX temp[8].xyz, input[1].xy__, 2D[0]; 4: TEX temp[10].xyz, input[2].xyz_, CUBE[2]; 5: TEX temp[12].xyz, input[1].xy__, 2D[1]; 6: TEX temp[18].x, temp[0].x___, 1D[3] SEM_WAIT SEM_ACQUIRE; 7: src0.xyz = temp[8], src1.xyz = input[0] SEM_WAIT MAD temp[17].xyz, src0.xyz, src1.xyz, src0.000 8: src0.xyz = temp[10], src1.xyz = const[0] MAD temp[16].xyz, src0.xyz, src1.yyy, -src0.111 9: src0.xyz = temp[12], src1.xyz = const[0] MAD temp[15].xyz, src0.xyz, src1.yyy, -src0.111 10: src0.xyz = temp[17], src1.xyz = temp[18] MAD temp[21].xyz, src0.xyz, src1.xxx, src0.000 11: src0.xyz = temp[16], src1.xyz = temp[15] DP3, src0.xyz, src1.xyz DP3 temp[20].w, src0._, src0._ 12: src0.xyz = temp[21], src0.w = temp[20] MAD color[0].xyz, src0.xyz, src0.www, src0.000 MAD color[0].w, src0.0, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: src0.xyz = input[2] DP3, src0.xyz, src0.xyz DP3 temp[14].w, src0._, src0._ 1: src0.w = temp[14] MAD temp[0].x, src0.w__, src0.111, src0.000 2: BEGIN_TEX; 3: TEX temp[8].xyz, input[1].xy__, 2D[0]; 4: TEX temp[10].xyz, input[2].xyz_, CUBE[2]; 5: TEX temp[12].xyz, input[1].xy__, 2D[1]; 6: TEX temp[18].x, temp[0].x___, 1D[3] SEM_WAIT SEM_ACQUIRE; 7: src0.xyz = temp[8], src1.xyz = input[0] SEM_WAIT MAD temp[17].xyz, src0.xyz, src1.xyz, src0.000 8: src0.xyz = temp[10], src1.xyz = const[0] MAD temp[16].xyz, src0.xyz, src1.yyy, -src0.111 9: src0.xyz = temp[12], src1.xyz = const[0] MAD temp[15].xyz, src0.xyz, src1.yyy, -src0.111 10: src0.xyz = temp[17], src1.xyz = temp[18] MAD temp[21].xyz, src0.xyz, src1.xxx, src0.000 11: src0.xyz = temp[16], src1.xyz = temp[15] DP3, src0.xyz, src1.xyz DP3 temp[20].w, src0._, src0._ 12: src0.xyz = temp[21], src0.w = temp[20] MAD color[0].xyz, src0.xyz, src0.www, src0.000 MAD color[0].w, src0.0, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = input[2] DP3, src0.xyz, src0.xyz DP3 temp[0].w, src0._, src0._ 1: src0.w = temp[0] MAD temp[1].z, src0.__w, src0.__1, src0.__0 2: BEGIN_TEX; 3: TEX temp[3].xyz, input[1].xy__, 2D[0]; 4: TEX temp[2].xyz, input[2].xyz_, CUBE[2]; 5: TEX temp[4].xyz, input[1].xy__, 2D[1]; 6: TEX temp[1].x, temp[1].z___, 1D[3] SEM_WAIT SEM_ACQUIRE; 7: src0.xyz = temp[3], src1.xyz = input[0] SEM_WAIT MAD temp[0].xyz, src0.xyz, src1.xyz, src0.000 8: src0.xyz = temp[2], src1.xyz = const[0] MAD temp[2].xyz, src0.xyz, src1.yyy, -src0.111 9: src0.xyz = temp[4], src1.xyz = const[0] MAD temp[3].xyz, src0.xyz, src1.yyy, -src0.111 10: src0.xyz = temp[0], src1.xyz = temp[1] MAD temp[0].xyz, src0.xyz, src1.xxx, src0.000 11: src0.xyz = temp[2], src1.xyz = temp[3] DP3, src0.xyz, src1.xyz DP3 temp[0].w, src0._, src0._ 12: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.www, src0.000 MAD color[0].w, src0.0, src0.1, src0.0 pc=8************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 1, tex_end: 0 (code_addr: 00000040) 0: xyz: t2 t0 t0 bias-> (00000002) w: t0 t0 t0 bias-> t0.w (00800000) xyz: t2.xyz t2.xyz t2.xxx op: 00804000 w: 1.0 1.0 t2.x op: 00800891 1: xyz: t0 t0 t0 bias-> t1.z (02040000) w: t0 t0 t0 bias-> (00000000) xyz: t0.www 1.0 0.0 op: 00050a8c w: t0.x t0.x t0.x op: 00000000 NODE 1: alu_offset: 2, tex_offset: 0, alu_end: 5, tex_end: 3 (code_addr: 00460142) TEX: TEX t3, t1, texture[0] (000080c1) TEX t2, t2, texture[2] (00009082) TEX t4, t1, texture[1] (00008901) TEX t1, t1, texture[3] (00009841) 2: xyz: t3 t0 t0 bias-> t0.xyz (03800003) w: t0 t0 t0 bias-> (00000000) xyz: t3.xyz t0.xyz 0.0 op: 00050200 w: t3.x t3.x t3.x op: 00000000 3: xyz: t2 c0 t0 bias-> t2.xyz (03880802) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz c0.yyy -1.0 op: 000d4300 w: t2.x t2.x t2.x op: 00000000 4: xyz: t4 c0 t0 bias-> t3.xyz (038c0804) w: t0 t0 t0 bias-> (00000000) xyz: t4.xyz c0.yyy -1.0 op: 000d4300 w: t4.x t4.x t4.x op: 00000000 5: xyz: t0 t1 t0 bias-> t0.xyz (03800040) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz t1.xxx 0.0 op: 00050280 w: t0.x t0.x t0.x op: 00000000 6: xyz: t2 t3 t0 bias-> (000000c2) w: t0 t0 t0 bias-> t0.w (00800000) xyz: t2.xyz t3.xyz t2.xxx op: 00804200 w: 1.0 1.0 t2.x op: 00800891 7: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz t0.www 0.0 op: 00050600 w: 0.0 1.0 0.0 op: 00040890 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL CONST[0..7] DCL TEMP[0..3] 0: MOV TEMP[0].xyz, IN[3] 1: MUL TEMP[1].xyz, IN[1].zxyw, TEMP[0].yzxw 2: MAD TEMP[0].xyz, IN[1].yzxw, TEMP[0].zxyw, -TEMP[1] 3: MUL TEMP[1].xyz, TEMP[0], IN[3].wwww 4: ADD TEMP[0].xyz, -IN[0], CONST[0] 5: MUL TEMP[2].xyz, TEMP[0], CONST[1] 6: ADD TEMP[3].xyz, -IN[0], CONST[2] 7: DP3 TEMP[0].w, TEMP[3], TEMP[3] 8: RSQ TEMP[0].w, TEMP[0].wwww 9: DP3 TEMP[0].y, TEMP[2], TEMP[1] 10: MUL TEMP[3].xyz, TEMP[0].wwww, TEMP[3] 11: DP3 TEMP[1].y, TEMP[1], TEMP[3] 12: DP3 TEMP[0].x, TEMP[2], IN[3] 13: DP3 TEMP[0].z, IN[1], TEMP[2] 14: DP3 TEMP[0].w, TEMP[0], TEMP[0] 15: RSQ TEMP[0].w, TEMP[0].wwww 16: DP3 TEMP[1].x, IN[3], TEMP[3] 17: DP3 TEMP[1].z, IN[1], TEMP[3] 18: MAD OUT[4].xyz, TEMP[0], TEMP[0].wwww, TEMP[1] 19: MOV OUT[3].xyz, TEMP[0] 20: MOV OUT[1], CONST[3] 21: MOV OUT[2].xyz, IN[2] 22: DP4 OUT[0].w, IN[0], CONST[4] 23: DP4 OUT[0].z, IN[0], CONST[5] 24: DP4 OUT[0].y, IN[0], CONST[6] 25: DP4 OUT[0].x, IN[0], CONST[7] 26: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0].xyz, input[3]; 1: MUL temp[1].xyz, input[1].zxyw, temp[0].yzxw; 2: MAD temp[0].xyz, input[1].yzxw, temp[0].zxyw, -temp[1]; 3: MUL temp[1].xyz, temp[0], input[3].wwww; 4: ADD temp[0].xyz, -input[0], const[0]; 5: MUL temp[2].xyz, temp[0], const[1]; 6: ADD temp[3].xyz, -input[0], const[2]; 7: DP3 temp[0].w, temp[3], temp[3]; 8: RSQ temp[0].w, temp[0].wwww; 9: DP3 temp[0].y, temp[2], temp[1]; 10: MUL temp[3].xyz, temp[0].wwww, temp[3]; 11: DP3 temp[1].y, temp[1], temp[3]; 12: DP3 temp[0].x, temp[2], input[3]; 13: DP3 temp[0].z, input[1], temp[2]; 14: DP3 temp[0].w, temp[0], temp[0]; 15: RSQ temp[0].w, temp[0].wwww; 16: DP3 temp[1].x, input[3], temp[3]; 17: DP3 temp[1].z, input[1], temp[3]; 18: MAD output[4].xyz, temp[0], temp[0].wwww, temp[1]; 19: MOV output[3].xyz, temp[0]; 20: MOV output[1], const[3]; 21: MOV output[2].xyz, input[2]; 22: DP4 temp[4].w, input[0], const[4]; 23: DP4 temp[4].z, input[0], const[5]; 24: DP4 temp[4].y, input[0], const[6]; 25: DP4 temp[4].x, input[0], const[7]; 26: MOV output[0], temp[4]; 27: MOV output[5], temp[4]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV temp[0].xyz, input[3]; 1: MUL temp[1].xyz, input[1].zxyw, temp[0].yzxw; 2: MAD temp[0].xyz, input[1].yzxw, temp[0].zxyw, -temp[1]; 3: MUL temp[1].xyz, temp[0], input[3].wwww; 4: ADD temp[0].xyz, -input[0], const[0]; 5: MUL temp[2].xyz, temp[0], const[1]; 6: ADD temp[3].xyz, -input[0], const[2]; 7: DP3 temp[0].w, temp[3], temp[3]; 8: RSQ temp[0].w, temp[0].wwww; 9: DP3 temp[0].y, temp[2], temp[1]; 10: MUL temp[3].xyz, temp[0].wwww, temp[3]; 11: DP3 temp[1].y, temp[1], temp[3]; 12: DP3 temp[0].x, temp[2], input[3]; 13: DP3 temp[0].z, input[1], temp[2]; 14: DP3 temp[0].w, temp[0], temp[0]; 15: RSQ temp[0].w, temp[0].wwww; 16: DP3 temp[1].x, input[3], temp[3]; 17: DP3 temp[1].z, input[1], temp[3]; 18: MAD output[4].xyz, temp[0], temp[0].wwww, temp[1]; 19: MOV output[3].xyz, temp[0]; 20: MOV output[1], const[3]; 21: MOV output[2].xyz, input[2]; 22: DP4 temp[4].w, input[0], const[4]; 23: DP4 temp[4].z, input[0], const[5]; 24: DP4 temp[4].y, input[0], const[6]; 25: DP4 temp[4].x, input[0], const[7]; 26: MOV output[0], temp[4]; 27: MOV output[5], temp[4]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV temp[0].xyz, input[3]; 1: MUL temp[1].xyz, input[1].zxyw, temp[0].yzxw; 2: MAD temp[0].xyz, input[1].yzxw, temp[0].zxyw, -temp[1]; 3: MUL temp[1].xyz, temp[0], input[3].wwww; 4: ADD temp[0].xyz, -input[0], const[0]; 5: MUL temp[2].xyz, temp[0], const[1]; 6: ADD temp[3].xyz, -input[0], const[2]; 7: DP3 temp[0].w, temp[3], temp[3]; 8: RSQ temp[0].w, temp[0].wwww; 9: DP3 temp[0].y, temp[2], temp[1]; 10: MUL temp[3].xyz, temp[0].wwww, temp[3]; 11: DP3 temp[1].y, temp[1], temp[3]; 12: DP3 temp[0].x, temp[2], input[3]; 13: DP3 temp[0].z, input[1], temp[2]; 14: DP3 temp[0].w, temp[0], temp[0]; 15: RSQ temp[0].w, temp[0].wwww; 16: DP3 temp[1].x, input[3], temp[3]; 17: DP3 temp[1].z, input[1], temp[3]; 18: MAD output[4].xyz, temp[0], temp[0].wwww, temp[1]; 19: MOV output[3].xyz, temp[0]; 20: MOV output[1], const[3]; 21: MOV output[2].xyz, input[2]; 22: DP4 temp[4].w, input[0], const[4]; 23: DP4 temp[4].z, input[0], const[5]; 24: DP4 temp[4].y, input[0], const[6]; 25: DP4 temp[4].x, input[0], const[7]; 26: MOV output[0], temp[4]; 27: MOV output[5], temp[4]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0].xyz, input[3]; 1: MUL temp[1].xyz, input[1].zxyw, temp[0].yzxw; 2: MAD temp[0].xyz, input[1].yzxw, temp[0].zxyw, -temp[1]; 3: MUL temp[1].xyz, temp[0], input[3].wwww; 4: ADD temp[0].xyz, -input[0], const[0]; 5: MUL temp[2].xyz, temp[0], const[1]; 6: ADD temp[3].xyz, -input[0], const[2]; 7: DP3 temp[0].w, temp[3], temp[3]; 8: RSQ temp[0].w, temp[0].wwww; 9: DP3 temp[0].y, temp[2], temp[1]; 10: MUL temp[3].xyz, temp[0].wwww, temp[3]; 11: DP3 temp[1].y, temp[1], temp[3]; 12: DP3 temp[0].x, temp[2], input[3]; 13: DP3 temp[0].z, input[1], temp[2]; 14: DP3 temp[0].w, temp[0], temp[0]; 15: RSQ temp[0].w, temp[0].wwww; 16: DP3 temp[1].x, input[3], temp[3]; 17: DP3 temp[1].z, input[1], temp[3]; 18: MAD output[4].xyz, temp[0], temp[0].wwww, temp[1]; 19: MOV output[3].xyz, temp[0]; 20: MOV output[1], const[3]; 21: MOV output[2].xyz, input[2]; 22: DP4 temp[4].w, input[0], const[4]; 23: DP4 temp[4].z, input[0], const[5]; 24: DP4 temp[4].y, input[0], const[6]; 25: DP4 temp[4].x, input[0], const[7]; 26: MOV output[0], temp[4]; 27: MOV output[5], temp[4]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0].xyz, input[3]; 1: MUL temp[1].xyz, input[1].zxyw, temp[0].yzxw; 2: MAD temp[0].xyz, input[1].yzxw, temp[0].zxyw, -temp[1]; 3: MUL temp[1].xyz, temp[0], input[3].wwww; 4: ADD temp[0].xyz, -input[0], const[0]; 5: MUL temp[2].xyz, temp[0], const[1]; 6: ADD temp[3].xyz, -input[0], const[2]; 7: DP4 temp[0].w, temp[3].xyz0, temp[3].xyz0; 8: RSQ temp[0].w, temp[0].wwww; 9: DP4 temp[0].y, temp[2].xyz0, temp[1].xyz0; 10: MUL temp[3].xyz, temp[0].wwww, temp[3]; 11: DP4 temp[1].y, temp[1].xyz0, temp[3].xyz0; 12: DP4 temp[0].x, temp[2].xyz0, input[3].xyz0; 13: DP4 temp[0].z, input[1].xyz0, temp[2].xyz0; 14: DP4 temp[0].w, temp[0].xyz0, temp[0].xyz0; 15: RSQ temp[0].w, temp[0].wwww; 16: DP4 temp[1].x, input[3].xyz0, temp[3].xyz0; 17: DP4 temp[1].z, input[1].xyz0, temp[3].xyz0; 18: MAD output[4].xyz, temp[0], temp[0].wwww, temp[1]; 19: MOV output[3].xyz, temp[0]; 20: MOV output[1], const[3]; 21: MOV output[2].xyz, input[2]; 22: DP4 temp[4].w, input[0], const[4]; 23: DP4 temp[4].z, input[0], const[5]; 24: DP4 temp[4].y, input[0], const[6]; 25: DP4 temp[4].x, input[0], const[7]; 26: MOV output[0], temp[4]; 27: MOV output[5], temp[4]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV temp[0].xyz, input[3]; 1: MUL temp[1].xyz, input[1].zxyw, temp[0].yzxw; 2: MAD temp[0].xyz, input[1].yzxw, temp[0].zxyw, -temp[1]; 3: MUL temp[1].xyz, temp[0], input[3].wwww; 4: ADD temp[0].xyz, -input[0], const[0]; 5: MUL temp[2].xyz, temp[0], const[1]; 6: ADD temp[3].xyz, -input[0], const[2]; 7: DP4 temp[0].w, temp[3].xyz0, temp[3].xyz0; 8: RSQ temp[0].w, temp[0].wwww; 9: DP4 temp[0].y, temp[2].xyz0, temp[1].xyz0; 10: MUL temp[3].xyz, temp[0].wwww, temp[3]; 11: DP4 temp[1].y, temp[1].xyz0, temp[3].xyz0; 12: DP4 temp[0].x, temp[2].xyz0, input[3].xyz0; 13: DP4 temp[0].z, input[1].xyz0, temp[2].xyz0; 14: DP4 temp[0].w, temp[0].xyz0, temp[0].xyz0; 15: RSQ temp[0].w, temp[0].wwww; 16: DP4 temp[1].x, input[3].xyz0, temp[3].xyz0; 17: DP4 temp[1].z, input[1].xyz0, temp[3].xyz0; 18: MAD output[4].xyz, temp[0], temp[0].wwww, temp[1]; 19: MOV output[3].xyz, temp[0]; 20: MOV output[1], const[3]; 21: MOV output[2].xyz, input[2]; 22: DP4 temp[4].w, input[0], const[4]; 23: DP4 temp[4].z, input[0], const[5]; 24: DP4 temp[4].y, input[0], const[6]; 25: DP4 temp[4].x, input[0], const[7]; 26: MOV output[0], temp[4]; 27: MOV output[5], temp[4]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0].xyz, input[3].xyz_; 1: MUL temp[1].xyz, input[1].zxy_, temp[0].yzx_; 2: MAD temp[0].xyz, input[1].yzx_, temp[0].zxy_, -temp[1].xyz_; 3: MUL temp[1].xyz, temp[0].xyz_, input[3].www_; 4: ADD temp[0].xyz, -input[0].xyz_, const[0].xyz_; 5: MUL temp[2].xyz, temp[0].xyz_, const[1].xyz_; 6: ADD temp[3].xyz, -input[0].xyz_, const[2].xyz_; 7: DP4 temp[0].w, temp[3].xyz0, temp[3].xyz0; 8: RSQ temp[0].w, temp[0].___w; 9: DP4 temp[0].y, temp[2].xyz0, temp[1].xyz0; 10: MUL temp[3].xyz, temp[0].www_, temp[3].xyz_; 11: DP4 temp[1].y, temp[1].xyz0, temp[3].xyz0; 12: DP4 temp[0].x, temp[2].xyz0, input[3].xyz0; 13: DP4 temp[0].z, input[1].xyz0, temp[2].xyz0; 14: DP4 temp[0].w, temp[0].xyz0, temp[0].xyz0; 15: RSQ temp[0].w, temp[0].___w; 16: DP4 temp[1].x, input[3].xyz0, temp[3].xyz0; 17: DP4 temp[1].z, input[1].xyz0, temp[3].xyz0; 18: MAD output[4].xyz, temp[0].xyz_, temp[0].www_, temp[1].xyz_; 19: MOV output[3].xyz, temp[0].xyz_; 20: MOV output[1], const[3]; 21: MOV output[2].xyz, input[2].xyz_; 22: DP4 temp[4].w, input[0], const[4]; 23: DP4 temp[4].z, input[0], const[5]; 24: DP4 temp[4].y, input[0], const[6]; 25: DP4 temp[4].x, input[0], const[7]; 26: MOV output[0], temp[4]; 27: MOV output[5], temp[4]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[1].xyz, input[1].zxy_, input[3].yzx_; 1: MAD temp[0].xyz, input[1].yzx_, input[3].zxy_, -temp[1].xyz_; 2: MUL temp[1].xyz, temp[0].xyz_, input[3].www_; 3: ADD temp[0].xyz, -input[0].xyz_, const[0].xyz_; 4: MUL temp[2].xyz, temp[0].xyz_, const[1].xyz_; 5: ADD temp[3].xyz, -input[0].xyz_, const[2].xyz_; 6: DP4 temp[0].w, temp[3].xyz0, temp[3].xyz0; 7: RSQ temp[0].w, temp[0].___w; 8: DP4 temp[0].y, temp[2].xyz0, temp[1].xyz0; 9: MUL temp[3].xyz, temp[0].www_, temp[3].xyz_; 10: DP4 temp[1].y, temp[1].xyz0, temp[3].xyz0; 11: DP4 temp[0].x, temp[2].xyz0, input[3].xyz0; 12: DP4 temp[0].z, input[1].xyz0, temp[2].xyz0; 13: DP4 temp[0].w, temp[0].xyz0, temp[0].xyz0; 14: RSQ temp[0].w, temp[0].___w; 15: DP4 temp[1].x, input[3].xyz0, temp[3].xyz0; 16: DP4 temp[1].z, input[1].xyz0, temp[3].xyz0; 17: MAD output[4].xyz, temp[0].xyz_, temp[0].www_, temp[1].xyz_; 18: MOV output[3].xyz, temp[0].xyz_; 19: MOV output[1], const[3]; 20: MOV output[2].xyz, input[2].xyz_; 21: DP4 temp[4].w, input[0], const[4]; 22: DP4 temp[4].z, input[0], const[5]; 23: DP4 temp[4].y, input[0], const[6]; 24: DP4 temp[4].x, input[0], const[7]; 25: MOV output[0], temp[4]; 26: MOV output[5], temp[4]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV temp[5], input[3].yzx_; 1: MUL temp[1].xyz, input[1].zxy_, temp[5]; 2: MOV temp[6], input[3].zxy_; 3: MAD temp[0].xyz, input[1].yzx_, temp[6], -temp[1].xyz_; 4: MUL temp[1].xyz, temp[0].xyz_, input[3].www_; 5: ADD temp[0].xyz, -input[0].xyz_, const[0].xyz_; 6: MUL temp[2].xyz, temp[0].xyz_, const[1].xyz_; 7: ADD temp[3].xyz, -input[0].xyz_, const[2].xyz_; 8: DP4 temp[0].w, temp[3].xyz0, temp[3].xyz0; 9: RSQ temp[0].w, temp[0].___w; 10: DP4 temp[0].y, temp[2].xyz0, temp[1].xyz0; 11: MUL temp[3].xyz, temp[0].www_, temp[3].xyz_; 12: DP4 temp[1].y, temp[1].xyz0, temp[3].xyz0; 13: DP4 temp[0].x, temp[2].xyz0, input[3].xyz0; 14: DP4 temp[0].z, input[1].xyz0, temp[2].xyz0; 15: DP4 temp[0].w, temp[0].xyz0, temp[0].xyz0; 16: RSQ temp[0].w, temp[0].___w; 17: DP4 temp[1].x, input[3].xyz0, temp[3].xyz0; 18: DP4 temp[1].z, input[1].xyz0, temp[3].xyz0; 19: MAD output[4].xyz, temp[0].xyz_, temp[0].www_, temp[1].xyz_; 20: MOV output[3].xyz, temp[0].xyz_; 21: MOV output[1], const[3]; 22: MOV output[2].xyz, input[2].xyz_; 23: DP4 temp[4].w, input[0], const[4]; 24: DP4 temp[4].z, input[0], const[5]; 25: DP4 temp[4].y, input[0], const[6]; 26: DP4 temp[4].x, input[0], const[7]; 27: MOV output[0], temp[4]; 28: MOV output[5], temp[4]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV temp[0], input[3].yzx_; 1: MUL temp[0].xyz, input[1].zxy_, temp[0]; 2: MOV temp[1], input[3].zxy_; 3: MAD temp[1].xyz, input[1].yzx_, temp[1], -temp[0].xyz_; 4: MUL temp[0].xyz, temp[1].xyz_, input[3].www_; 5: ADD temp[1].xyz, -input[0].xyz_, const[0].xyz_; 6: MUL temp[2].xyz, temp[1].xyz_, const[1].xyz_; 7: ADD temp[3].xyz, -input[0].xyz_, const[2].xyz_; 8: DP4 temp[1].w, temp[3].xyz0, temp[3].xyz0; 9: RSQ temp[1].w, temp[1].___w; 10: DP4 temp[1].y, temp[2].xyz0, temp[0].xyz0; 11: MUL temp[3].xyz, temp[1].www_, temp[3].xyz_; 12: DP4 temp[0].y, temp[0].xyz0, temp[3].xyz0; 13: DP4 temp[1].x, temp[2].xyz0, input[3].xyz0; 14: DP4 temp[1].z, input[1].xyz0, temp[2].xyz0; 15: DP4 temp[1].w, temp[1].xyz0, temp[1].xyz0; 16: RSQ temp[1].w, temp[1].___w; 17: DP4 temp[0].x, input[3].xyz0, temp[3].xyz0; 18: DP4 temp[0].z, input[1].xyz0, temp[3].xyz0; 19: MAD output[4].xyz, temp[1].xyz_, temp[1].www_, temp[0].xyz_; 20: MOV output[3].xyz, temp[1].xyz_; 21: MOV output[1], const[3]; 22: MOV output[2].xyz, input[2].xyz_; 23: DP4 temp[0].w, input[0], const[4]; 24: DP4 temp[0].z, input[0], const[5]; 25: DP4 temp[0].y, input[0], const[6]; 26: DP4 temp[0].x, input[0], const[7]; 27: MOV output[0], temp[0]; 28: MOV output[5], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV temp[0], input[3].yzx_; 1: MUL temp[0].xyz, input[1].zxy_, temp[0]; 2: MOV temp[1], input[3].zxy_; 3: MAD temp[1].xyz, input[1].yzx_, temp[1], -temp[0].xyz_; 4: MUL temp[0].xyz, temp[1].xyz_, input[3].www_; 5: ADD temp[1].xyz, -input[0].xyz_, const[0].xyz_; 6: MUL temp[2].xyz, temp[1].xyz_, const[1].xyz_; 7: ADD temp[3].xyz, -input[0].xyz_, const[2].xyz_; 8: DP4 temp[1].w, temp[3].xyz0, temp[3].xyz0; 9: RSQ temp[1].w, temp[1].___w; 10: DP4 temp[1].y, temp[2].xyz0, temp[0].xyz0; 11: MUL temp[3].xyz, temp[1].www_, temp[3].xyz_; 12: DP4 temp[0].y, temp[0].xyz0, temp[3].xyz0; 13: DP4 temp[1].x, temp[2].xyz0, input[3].xyz0; 14: DP4 temp[1].z, input[1].xyz0, temp[2].xyz0; 15: DP4 temp[1].w, temp[1].xyz0, temp[1].xyz0; 16: RSQ temp[1].w, temp[1].___w; 17: DP4 temp[0].x, input[3].xyz0, temp[3].xyz0; 18: DP4 temp[0].z, input[1].xyz0, temp[3].xyz0; 19: MAD output[4].xyz, temp[1].xyz_, temp[1].www_, temp[0].xyz_; 20: MOV output[3].xyz, temp[1].xyz_; 21: MOV output[1], const[3]; 22: MOV output[2].xyz, input[2].xyz_; 23: DP4 temp[0].w, input[0], const[4]; 24: DP4 temp[0].z, input[0], const[5]; 25: DP4 temp[0].y, input[0], const[6]; 26: DP4 temp[0].x, input[0], const[7]; 27: MOV output[0], temp[0]; 28: MOV output[5], temp[0]; Final vertex program code: 0: op: 0x00f00003 dst: 0t op: VE_ADD src0: 0x01c22061 reg: 3i swiz: Y/ Z/ X/ U src1: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 1: op: 0x00700002 dst: 0t op: VE_MULTIPLY src0: 0x01c84021 reg: 1i swiz: Z/ X/ Y/ U src1: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 2: op: 0x00f02003 dst: 1t op: VE_ADD src0: 0x01c84061 reg: 3i swiz: Z/ X/ Y/ U src1: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 3: op: 0x00702004 dst: 1t op: VE_MULTIPLY_ADD src0: 0x01c22021 reg: 1i swiz: Y/ Z/ X/ U src1: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W src2: 0x1fd10000 reg: 0t swiz: -X/-Y/-Z/-U 4: op: 0x00700002 dst: 0t op: VE_MULTIPLY src0: 0x01d10020 reg: 1t swiz: X/ Y/ Z/ U src1: 0x01db6061 reg: 3i swiz: W/ W/ W/ U src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 5: op: 0x00702003 dst: 1t op: VE_ADD src0: 0x1fd10001 reg: 0i swiz: -X/-Y/-Z/-U src1: 0x01d10002 reg: 0c swiz: X/ Y/ Z/ U src2: 0x01248002 reg: 0c swiz: 0/ 0/ 0/ 0 6: op: 0x00704002 dst: 2t op: VE_MULTIPLY src0: 0x01d10020 reg: 1t swiz: X/ Y/ Z/ U src1: 0x01d10022 reg: 1c swiz: X/ Y/ Z/ U src2: 0x01248022 reg: 1c swiz: 0/ 0/ 0/ 0 7: op: 0x00706003 dst: 3t op: VE_ADD src0: 0x1fd10001 reg: 0i swiz: -X/-Y/-Z/-U src1: 0x01d10042 reg: 2c swiz: X/ Y/ Z/ U src2: 0x01248042 reg: 2c swiz: 0/ 0/ 0/ 0 8: op: 0x00802001 dst: 1t op: VE_DOT_PRODUCT src0: 0x01110060 reg: 3t swiz: X/ Y/ Z/ 0 src1: 0x01110060 reg: 3t swiz: X/ Y/ Z/ 0 src2: 0x01248060 reg: 3t swiz: 0/ 0/ 0/ 0 9: op: 0x00802048 dst: 1t op: ME_RECIP_SQRT_DX src0: 0x00db6020 reg: 1t swiz: W/ W/ W/ W src1: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 src2: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 10: op: 0x00202001 dst: 1t op: VE_DOT_PRODUCT src0: 0x01110040 reg: 2t swiz: X/ Y/ Z/ 0 src1: 0x01110000 reg: 0t swiz: X/ Y/ Z/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 11: op: 0x00706002 dst: 3t op: VE_MULTIPLY src0: 0x01db6020 reg: 1t swiz: W/ W/ W/ U src1: 0x01d10060 reg: 3t swiz: X/ Y/ Z/ U src2: 0x01248060 reg: 3t swiz: 0/ 0/ 0/ 0 12: op: 0x00200001 dst: 0t op: VE_DOT_PRODUCT src0: 0x01110000 reg: 0t swiz: X/ Y/ Z/ 0 src1: 0x01110060 reg: 3t swiz: X/ Y/ Z/ 0 src2: 0x01248060 reg: 3t swiz: 0/ 0/ 0/ 0 13: op: 0x00102001 dst: 1t op: VE_DOT_PRODUCT src0: 0x01110040 reg: 2t swiz: X/ Y/ Z/ 0 src1: 0x01110061 reg: 3i swiz: X/ Y/ Z/ 0 src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 14: op: 0x00402001 dst: 1t op: VE_DOT_PRODUCT src0: 0x01110021 reg: 1i swiz: X/ Y/ Z/ 0 src1: 0x01110040 reg: 2t swiz: X/ Y/ Z/ 0 src2: 0x01248040 reg: 2t swiz: 0/ 0/ 0/ 0 15: op: 0x00802001 dst: 1t op: VE_DOT_PRODUCT src0: 0x01110020 reg: 1t swiz: X/ Y/ Z/ 0 src1: 0x01110020 reg: 1t swiz: X/ Y/ Z/ 0 src2: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 16: op: 0x00802048 dst: 1t op: ME_RECIP_SQRT_DX src0: 0x00db6020 reg: 1t swiz: W/ W/ W/ W src1: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 src2: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 17: op: 0x00100001 dst: 0t op: VE_DOT_PRODUCT src0: 0x01110061 reg: 3i swiz: X/ Y/ Z/ 0 src1: 0x01110060 reg: 3t swiz: X/ Y/ Z/ 0 src2: 0x01248060 reg: 3t swiz: 0/ 0/ 0/ 0 18: op: 0x00400001 dst: 0t op: VE_DOT_PRODUCT src0: 0x01110021 reg: 1i swiz: X/ Y/ Z/ 0 src1: 0x01110060 reg: 3t swiz: X/ Y/ Z/ 0 src2: 0x01248060 reg: 3t swiz: 0/ 0/ 0/ 0 19: op: 0x00708204 dst: 4o op: VE_MULTIPLY_ADD src0: 0x01d10020 reg: 1t swiz: X/ Y/ Z/ U src1: 0x01db6020 reg: 1t swiz: W/ W/ W/ U src2: 0x01d10000 reg: 0t swiz: X/ Y/ Z/ U 20: op: 0x00706203 dst: 3o op: VE_ADD src0: 0x01d10020 reg: 1t swiz: X/ Y/ Z/ U src1: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 src2: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 21: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src1: 0x01248062 reg: 3c swiz: 0/ 0/ 0/ 0 src2: 0x01248062 reg: 3c swiz: 0/ 0/ 0/ 0 22: op: 0x00704203 dst: 2o op: VE_ADD src0: 0x01d10041 reg: 2i swiz: X/ Y/ Z/ U src1: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 23: op: 0x00800001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src2: 0x01248082 reg: 4c swiz: 0/ 0/ 0/ 0 24: op: 0x00400001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src2: 0x012480a2 reg: 5c swiz: 0/ 0/ 0/ 0 25: op: 0x00200001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src2: 0x012480c2 reg: 6c swiz: 0/ 0/ 0/ 0 26: op: 0x00100001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src2: 0x012480e2 reg: 7c swiz: 0/ 0/ 0/ 0 27: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 28: op: 0x00f0a203 dst: 5o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL IN[1], GENERIC[0], PERSPECTIVE DCL IN[2], GENERIC[1], PERSPECTIVE DCL IN[3], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[3] DCL SAMP[6] DCL TEMP[0..3] IMM FLT32 { 2.0000, 1.0000, 16.0000, 0.0000} 0: TEX TEMP[0].xyz, IN[1], SAMP[0], 2D 1: TEX TEMP[1].xyz, IN[1], SAMP[1], 2D 2: TEX TEMP[2].xyz, IN[1], SAMP[6], 2D 3: DP3 TEMP[0].w, IN[2], IN[2] 4: DP3 TEMP[2].w, IN[3], IN[3] 5: RSQ TEMP[2].w, TEMP[2].wwww 6: MUL TEMP[3].yzw, TEMP[2].wwww, IN[3].xxyz 7: RSQ TEMP[2].w, TEMP[0].wwww 8: MAD TEMP[1].xyz, TEMP[1], IMM[0].xxxx, -IMM[0].yyyy 9: MUL TEMP[0].xyz, TEMP[0], IN[0] 10: TEX TEMP[3].x, TEMP[0].wwww, SAMP[3], 1D 11: DP3_SAT TEMP[0].w, TEMP[1], TEMP[3].yzww 12: MUL TEMP[3].yzw, TEMP[2].wwww, IN[2].xxyz 13: POW TEMP[2].w, TEMP[0].wwww, IMM[0].zzzz 14: MUL TEMP[2].w, IN[0], TEMP[2] 15: DP3 TEMP[0].w, TEMP[3].yzww, TEMP[1] 16: MUL TEMP[2].xyz, TEMP[2].wwww, TEMP[2] 17: MAD TEMP[0].xyz, TEMP[0], TEMP[0].wwww, TEMP[2] 18: MUL OUT[0].xyz, TEMP[0], TEMP[3].xxxx 19: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].xyz, input[1], 2D[1]; 2: TEX temp[2].xyz, input[1], 2D[6]; 3: DP3 temp[0].w, input[2], input[2]; 4: DP3 temp[2].w, input[3], input[3]; 5: RSQ temp[2].w, temp[2].wwww; 6: MUL temp[3].yzw, temp[2].wwww, input[3].xxyz; 7: RSQ temp[2].w, temp[0].wwww; 8: MAD temp[1].xyz, temp[1], const[0].xxxx, -const[0].yyyy; 9: MUL temp[0].xyz, temp[0], input[0]; 10: TEX temp[3].x, temp[0].wwww, 1D[3]; 11: DP3_SAT temp[0].w, temp[1], temp[3].yzww; 12: MUL temp[3].yzw, temp[2].wwww, input[2].xxyz; 13: POW temp[2].w, temp[0].wwww, const[0].zzzz; 14: MUL temp[2].w, input[0], temp[2]; 15: DP3 temp[0].w, temp[3].yzww, temp[1]; 16: MUL temp[2].xyz, temp[2].wwww, temp[2]; 17: MAD temp[0].xyz, temp[0], temp[0].wwww, temp[2]; 18: MUL output[0].xyz, temp[0], temp[3].xxxx; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].xyz, input[1], 2D[1]; 2: TEX temp[2].xyz, input[1], 2D[6]; 3: DP3 temp[0].w, input[2], input[2]; 4: DP3 temp[2].w, input[3], input[3]; 5: RSQ temp[2].w, temp[2].wwww; 6: MUL temp[3].yzw, temp[2].wwww, input[3].xxyz; 7: RSQ temp[2].w, temp[0].wwww; 8: MAD temp[1].xyz, temp[1], const[0].xxxx, -const[0].yyyy; 9: MUL temp[0].xyz, temp[0], input[0]; 10: TEX temp[3].x, temp[0].wwww, 1D[3]; 11: DP3_SAT temp[0].w, temp[1], temp[3].yzww; 12: MUL temp[3].yzw, temp[2].wwww, input[2].xxyz; 13: POW temp[2].w, temp[0].wwww, const[0].zzzz; 14: MUL temp[2].w, input[0], temp[2]; 15: DP3 temp[0].w, temp[3].yzww, temp[1]; 16: MUL temp[2].xyz, temp[2].wwww, temp[2]; 17: MAD temp[0].xyz, temp[0], temp[0].wwww, temp[2]; 18: MUL output[0].xyz, temp[0], temp[3].xxxx; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].xyz, input[1], 2D[1]; 2: TEX temp[2].xyz, input[1], 2D[6]; 3: DP3 temp[0].w, input[2], input[2]; 4: DP3 temp[2].w, input[3], input[3]; 5: RSQ temp[2].w, temp[2].wwww; 6: MUL temp[3].yzw, temp[2].wwww, input[3].xxyz; 7: RSQ temp[2].w, temp[0].wwww; 8: MAD temp[1].xyz, temp[1], const[0].xxxx, -const[0].yyyy; 9: MUL temp[0].xyz, temp[0], input[0]; 10: TEX temp[3].x, temp[0].wwww, 1D[3]; 11: DP3_SAT temp[0].w, temp[1], temp[3].yzww; 12: MUL temp[3].yzw, temp[2].wwww, input[2].xxyz; 13: POW temp[2].w, temp[0].wwww, const[0].zzzz; 14: MUL temp[2].w, input[0], temp[2]; 15: DP3 temp[0].w, temp[3].yzww, temp[1]; 16: MUL temp[2].xyz, temp[2].wwww, temp[2]; 17: MAD temp[0].xyz, temp[0], temp[0].wwww, temp[2]; 18: MUL output[0].xyz, temp[0], temp[3].xxxx; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].xyz, input[1], 2D[1]; 2: TEX temp[2].xyz, input[1], 2D[6]; 3: DP3 temp[0].w, input[2], input[2]; 4: DP3 temp[2].w, input[3], input[3]; 5: RSQ temp[2].w, temp[2].wwww; 6: MUL temp[3].yzw, temp[2].wwww, input[3].xxyz; 7: RSQ temp[2].w, temp[0].wwww; 8: MAD temp[1].xyz, temp[1], const[0].xxxx, -const[0].yyyy; 9: MUL temp[0].xyz, temp[0], input[0]; 10: TEX temp[3].x, temp[0].wwww, 1D[3]; 11: DP3_SAT temp[0].w, temp[1], temp[3].yzww; 12: MUL temp[3].yzw, temp[2].wwww, input[2].xxyz; 13: POW temp[2].w, temp[0].wwww, const[0].zzzz; 14: MUL temp[2].w, input[0], temp[2]; 15: DP3 temp[0].w, temp[3].yzww, temp[1]; 16: MUL temp[2].xyz, temp[2].wwww, temp[2]; 17: MAD temp[0].xyz, temp[0], temp[0].wwww, temp[2]; 18: MUL output[0].xyz, temp[0], temp[3].xxxx; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].xyz, input[1], 2D[1]; 2: TEX temp[2].xyz, input[1], 2D[6]; 3: DP3 temp[0].w, input[2], input[2]; 4: DP3 temp[2].w, input[3], input[3]; 5: RSQ temp[2].w, temp[2].wwww; 6: MUL temp[3].yzw, temp[2].wwww, input[3].xxyz; 7: RSQ temp[2].w, temp[0].wwww; 8: MAD temp[1].xyz, temp[1], const[0].xxxx, -const[0].yyyy; 9: MUL temp[0].xyz, temp[0], input[0]; 10: TEX temp[3].x, temp[0].wwww, 1D[3]; 11: DP3_SAT temp[0].w, temp[1], temp[3].yzww; 12: MUL temp[3].yzw, temp[2].wwww, input[2].xxyz; 13: POW temp[2].w, temp[0].wwww, const[0].zzzz; 14: MUL temp[2].w, input[0], temp[2]; 15: DP3 temp[0].w, temp[3].yzww, temp[1]; 16: MUL temp[2].xyz, temp[2].wwww, temp[2]; 17: MAD temp[0].xyz, temp[0], temp[0].wwww, temp[2]; 18: MUL output[0].xyz, temp[0], temp[3].xxxx; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[4], input[1], 2D[0]; 1: MOV temp[0].xyz, temp[4]; 2: TEX temp[5], input[1], 2D[1]; 3: MOV temp[1].xyz, temp[5]; 4: TEX temp[6], input[1], 2D[6]; 5: MOV temp[2].xyz, temp[6]; 6: DP3 temp[0].w, input[2], input[2]; 7: DP3 temp[2].w, input[3], input[3]; 8: RSQ temp[2].w, temp[2].wwww; 9: MUL temp[3].yzw, temp[2].wwww, input[3].xxyz; 10: RSQ temp[2].w, temp[0].wwww; 11: MAD temp[1].xyz, temp[1], const[0].xxxx, -const[0].yyyy; 12: MUL temp[0].xyz, temp[0], input[0]; 13: TEX temp[7], temp[0].wwww, 1D[3]; 14: MOV temp[3].x, temp[7]; 15: DP3_SAT temp[0].w, temp[1], temp[3].yzww; 16: MUL temp[3].yzw, temp[2].wwww, input[2].xxyz; 17: POW temp[2].w, temp[0].wwww, const[0].zzzz; 18: MUL temp[2].w, input[0], temp[2]; 19: DP3 temp[0].w, temp[3].yzww, temp[1]; 20: MUL temp[2].xyz, temp[2].wwww, temp[2]; 21: MAD temp[0].xyz, temp[0], temp[0].wwww, temp[2]; 22: MUL output[0].xyz, temp[0], temp[3].xxxx; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[4], input[1], 2D[0]; 1: MOV temp[0].xyz, temp[4]; 2: TEX temp[5], input[1], 2D[1]; 3: MOV temp[1].xyz, temp[5]; 4: TEX temp[6], input[1], 2D[6]; 5: MOV temp[2].xyz, temp[6]; 6: DP3 temp[0].w, input[2], input[2]; 7: DP3 temp[2].w, input[3], input[3]; 8: RSQ temp[2].w, |temp[2].wwww|; 9: MUL temp[3].yzw, temp[2].wwww, input[3].xxyz; 10: RSQ temp[2].w, |temp[0].wwww|; 11: MAD temp[1].xyz, temp[1], const[0].xxxx, -const[0].yyyy; 12: MUL temp[0].xyz, temp[0], input[0]; 13: TEX temp[7], temp[0].wwww, 1D[3]; 14: MOV temp[3].x, temp[7]; 15: DP3_SAT temp[0].w, temp[1], temp[3].yzww; 16: MUL temp[3].yzw, temp[2].wwww, input[2].xxyz; 17: LG2 temp[2].w, temp[0].wwww; 18: MUL temp[2].w, temp[2].wwww, const[0].zzzz; 19: EX2 temp[2].w, temp[2].wwww; 20: MUL temp[2].w, input[0], temp[2]; 21: DP3 temp[0].w, temp[3].yzww, temp[1]; 22: MUL temp[2].xyz, temp[2].wwww, temp[2]; 23: MAD temp[0].xyz, temp[0], temp[0].wwww, temp[2]; 24: MUL output[0].xyz, temp[0], temp[3].xxxx; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[4].xyz, input[1].xy__, 2D[0]; 1: MOV temp[0].xyz, temp[4].xyz_; 2: TEX temp[5].xyz, input[1].xy__, 2D[1]; 3: MOV temp[1].xyz, temp[5].xyz_; 4: TEX temp[6].xyz, input[1].xy__, 2D[6]; 5: MOV temp[2].xyz, temp[6].xyz_; 6: DP3 temp[0].w, input[2].xyz_, input[2].xyz_; 7: DP3 temp[2].w, input[3].xyz_, input[3].xyz_; 8: RSQ temp[2].w, |temp[2].___w|; 9: MUL temp[3].yzw, temp[2]._www, input[3]._xyz; 10: RSQ temp[2].w, |temp[0].___w|; 11: MAD temp[1].xyz, temp[1].xyz_, const[0].xxx_, -const[0].yyy_; 12: MUL temp[0].xyz, temp[0].xyz_, input[0].xyz_; 13: TEX temp[7].x, temp[0].w___, 1D[3]; 14: MOV temp[3].x, temp[7].x___; 15: DP3_SAT temp[0].w, temp[1].xyz_, temp[3].yzw_; 16: MUL temp[3].yzw, temp[2]._www, input[2]._xyz; 17: LG2 temp[2].w, temp[0].___w; 18: MUL temp[2].w, temp[2].___w, const[0].___z; 19: EX2 temp[2].w, temp[2].___w; 20: MUL temp[2].w, input[0].___w, temp[2].___w; 21: DP3 temp[0].w, temp[3].yzw_, temp[1].xyz_; 22: MUL temp[2].xyz, temp[2].www_, temp[2].xyz_; 23: MAD temp[0].xyz, temp[0].xyz_, temp[0].www_, temp[2].xyz_; 24: MUL output[0].xyz, temp[0].xyz_, temp[3].xxx_; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[4].xyz, input[1].xy__, 2D[0]; 1: MOV temp[0].xyz, temp[4].xyz_; 2: TEX temp[5].xyz, input[1].xy__, 2D[1]; 3: MOV temp[1].xyz, temp[5].xyz_; 4: TEX temp[6].xyz, input[1].xy__, 2D[6]; 5: MOV temp[2].xyz, temp[6].xyz_; 6: DP3 temp[0].w, input[2].xyz_, input[2].xyz_; 7: DP3 temp[2].w, input[3].xyz_, input[3].xyz_; 8: RSQ temp[2].w, |temp[2].___w|; 9: MUL temp[3].yzw, temp[2]._www, input[3]._xyz; 10: RSQ temp[2].w, |temp[0].___w|; 11: MAD temp[1].xyz, temp[1].xyz_, const[0].xxx_, -const[0].yyy_; 12: MUL temp[0].xyz, temp[0].xyz_, input[0].xyz_; 13: TEX temp[7].x, temp[0].w___, 1D[3]; 14: MOV temp[3].x, temp[7].x___; 15: DP3_SAT temp[0].w, temp[1].xyz_, temp[3].yzw_; 16: MUL temp[3].yzw, temp[2]._www, input[2]._xyz; 17: LG2 temp[2].w, temp[0].___w; 18: MUL temp[2].w, temp[2].___w, const[0].___z; 19: EX2 temp[2].w, temp[2].___w; 20: MUL temp[2].w, input[0].___w, temp[2].___w; 21: DP3 temp[0].w, temp[3].yzw_, temp[1].xyz_; 22: MUL temp[2].xyz, temp[2].www_, temp[2].xyz_; 23: MAD temp[0].xyz, temp[0].xyz_, temp[0].www_, temp[2].xyz_; 24: MUL output[0].xyz, temp[0].xyz_, temp[3].xxx_; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[8].xyz, input[1].xy__, 2D[0]; 1: MOV temp[9].xyz, temp[8].xyz_; 2: TEX temp[10].xyz, input[1].xy__, 2D[1]; 3: MOV temp[11].xyz, temp[10].xyz_; 4: TEX temp[12].xyz, input[1].xy__, 2D[6]; 5: MOV temp[13].xyz, temp[12].xyz_; 6: DP3 temp[14].w, input[2].xyz_, input[2].xyz_; 7: DP3 temp[15].w, input[3].xyz_, input[3].xyz_; 8: RSQ temp[16].w, |temp[15].___w|; 9: MUL temp[17].yzw, temp[16]._www, input[3]._xyz; 10: RSQ temp[18].w, |temp[14].___w|; 11: MAD temp[19].xyz, temp[11].xyz_, const[0].xxx_, -const[0].yyy_; 12: MUL temp[20].xyz, temp[9].xyz_, input[0].xyz_; 13: TEX temp[21].x, temp[14].w___, 1D[3]; 14: MOV temp[22].x, temp[21].x___; 15: DP3_SAT temp[23].w, temp[19].xyz_, temp[17].yzw_; 16: MUL temp[24].yzw, temp[18]._www, input[2]._xyz; 17: LG2 temp[25].w, temp[23].___w; 18: MUL temp[26].w, temp[25].___w, const[0].___z; 19: EX2 temp[27].w, temp[26].___w; 20: MUL temp[28].w, input[0].___w, temp[27].___w; 21: DP3 temp[29].w, temp[24].yzw_, temp[19].xyz_; 22: MUL temp[30].xyz, temp[28].www_, temp[13].xyz_; 23: MAD temp[31].xyz, temp[20].xyz_, temp[29].www_, temp[30].xyz_; 24: MUL output[0].xyz, temp[31].xyz_, temp[22].xxx_; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[8].xyz, input[1].xy__, 2D[0]; 1: TEX temp[10].xyz, input[1].xy__, 2D[1]; 2: TEX temp[12].xyz, input[1].xy__, 2D[6]; 3: DP3 temp[14].w, input[2].xyz_, input[2].xyz_; 4: DP3 temp[15].w, input[3].xyz_, input[3].xyz_; 5: RSQ temp[16].w, |temp[15].___w|; 6: MUL temp[17].yzw, temp[16]._www, input[3]._xyz; 7: RSQ temp[18].w, |temp[14].___w|; 8: MAD temp[19].xyz, temp[10].xyz_, const[0].xxx_, -none.111_; 9: MUL temp[20].xyz, temp[8].xyz_, input[0].xyz_; 10: TEX temp[21].x, temp[14].w___, 1D[3]; 11: DP3_SAT temp[23].w, temp[19].xyz_, temp[17].yzw_; 12: MUL temp[24].yzw, temp[18]._www, input[2]._xyz; 13: LG2 temp[25].w, temp[23].___w; 14: MUL temp[26].w, temp[25].___w, const[0].___z; 15: EX2 temp[27].w, temp[26].___w; 16: MUL temp[28].w, input[0].___w, temp[27].___w; 17: DP3 temp[29].w, temp[24].yzw_, temp[19].xyz_; 18: MUL temp[30].xyz, temp[28].www_, temp[12].xyz_; 19: MAD temp[31].xyz, temp[20].xyz_, temp[29].www_, temp[30].xyz_; 20: MUL output[0].xyz, temp[31].xyz_, temp[21].xxx_; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[8].xyz, input[1].xy__, 2D[0]; 1: TEX temp[10].xyz, input[1].xy__, 2D[1]; 2: TEX temp[12].xyz, input[1].xy__, 2D[6]; 3: DP3 temp[14].w, input[2].xyz_, input[2].xyz_; 4: DP3 temp[15].w, input[3].xyz_, input[3].xyz_; 5: RSQ temp[16].w, |temp[15].___w|; 6: MUL temp[17].yzw, temp[16]._www, input[3]._xyz; 7: RSQ temp[18].w, |temp[14].___w|; 8: MAD temp[19].xyz, temp[10].xyz_, const[0].xxx_, -none.111_; 9: MUL temp[20].xyz, temp[8].xyz_, input[0].xyz_; 10: MOV temp[0].x, temp[14].w___; 11: TEX temp[21].x, temp[0].x___, 1D[3]; 12: MOV temp[1].xy, temp[17].yz__; 13: MOV temp[1].z, temp[17].__w_; 14: DP3_SAT temp[23].w, temp[19].xyz_, temp[1].xyz_; 15: MUL temp[24].yzw, temp[18]._www, input[2]._xyz; 16: LG2 temp[25].w, temp[23].___w; 17: MUL temp[26].w, temp[25].___w, const[0].___z; 18: EX2 temp[27].w, temp[26].___w; 19: MUL temp[28].w, input[0].___w, temp[27].___w; 20: MOV temp[2].xy, temp[24].yz__; 21: MOV temp[2].z, temp[24].__w_; 22: DP3 temp[29].w, temp[2].xyz_, temp[19].xyz_; 23: MUL temp[30].xyz, temp[28].www_, temp[12].xyz_; 24: MAD temp[31].xyz, temp[20].xyz_, temp[29].www_, temp[30].xyz_; 25: MUL output[0].xyz, temp[31].xyz_, temp[21].xxx_; CONST[0] = { 2.0000 1.0000 16.0000 0.0000 } Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[8].xyz, input[1].xy__, 2D[0]; 1: TEX temp[10].xyz, input[1].xy__, 2D[1]; 2: TEX temp[12].xyz, input[1].xy__, 2D[6]; 3: DP3 temp[14].w, input[2].xyz_, input[2].xyz_; 4: DP3 temp[15].w, input[3].xyz_, input[3].xyz_; 5: RSQ temp[16].w, |temp[15].___w|; 6: MUL temp[17].yzw, temp[16]._www, input[3]._xyz; 7: RSQ temp[18].w, |temp[14].___w|; 8: MAD temp[19].xyz, temp[10].xyz_, const[0].xxx_, -none.111_; 9: MUL temp[20].xyz, temp[8].xyz_, input[0].xyz_; 10: MOV temp[0].x, temp[14].w___; 11: TEX temp[21].x, temp[0].x___, 1D[3]; 12: MOV temp[1].xy, temp[17].yz__; 13: MOV temp[1].z, temp[17].__w_; 14: DP3_SAT temp[23].w, temp[19].xyz_, temp[1].xyz_; 15: MUL temp[24].yzw, temp[18]._www, input[2]._xyz; 16: LG2 temp[25].w, temp[23].___w; 17: MUL temp[26].w, temp[25].___w, const[0].___z; 18: EX2 temp[27].w, temp[26].___w; 19: MUL temp[28].w, input[0].___w, temp[27].___w; 20: MOV temp[2].xy, temp[24].yz__; 21: MOV temp[2].z, temp[24].__w_; 22: DP3 temp[29].w, temp[2].xyz_, temp[19].xyz_; 23: MUL temp[30].xyz, temp[28].www_, temp[12].xyz_; 24: MAD temp[31].xyz, temp[20].xyz_, temp[29].www_, temp[30].xyz_; 25: MUL output[0].xyz, temp[31].xyz_, temp[21].xxx_; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[8].xyz, input[1].xy__, 2D[0]; 1: TEX temp[10].xyz, input[1].xy__, 2D[1]; 2: TEX temp[12].xyz, input[1].xy__, 2D[6]; 3: src0.xyz = input[2] DP3, src0.xyz, src0.xyz DP3 temp[14].w, src0._, src0._ 4: src0.xyz = input[3] DP3, src0.xyz, src0.xyz DP3 temp[15].w, src0._, src0._ 5: src0.w = temp[15] RSQ temp[16].w, |src0.w| 6: src0.xyz = input[3], src0.w = temp[16] MAD temp[17].yz, src0._ww, src0._xy, src0.000 MAD temp[17].w, src0.w, src0.z, src0.0 7: src0.w = temp[14] RSQ temp[18].w, |src0.w| 8: src0.xyz = temp[10], src1.xyz = const[0] MAD temp[19].xyz, src0.xyz, src1.xxx, -src0.111 9: src0.xyz = temp[8], src1.xyz = input[0] MAD temp[20].xyz, src0.xyz, src1.xyz, src0.000 10: src0.w = temp[14] MAD temp[0].x, src0.w__, src0.111, src0.000 11: TEX temp[21].x, temp[0].x___, 1D[3]; 12: src0.xyz = temp[17] MAD temp[1].xy, src0.yz_, src0.111, src0.000 13: src0.w = temp[17] MAD temp[1].z, src0.__w, src0.111, src0.000 14: src0.xyz = temp[19], src1.xyz = temp[1] DP3_SAT, src0.xyz, src1.xyz DP3_SAT temp[23].w, src0._, src0._ 15: src0.xyz = input[2], src0.w = temp[18] MAD temp[24].yz, src0._ww, src0._xy, src0.000 MAD temp[24].w, src0.w, src0.z, src0.0 16: src0.w = temp[23] LG2 temp[25].w, src0.w 17: src0.xyz = const[0], src0.w = temp[25] MAD temp[26].w, src0.w, src0.z, src0.0 18: src0.w = temp[26] EX2 temp[27].w, src0.w 19: src0.w = input[0], src1.w = temp[27] MAD temp[28].w, src0.w, src1.w, src0.0 20: src0.xyz = temp[24] MAD temp[2].xy, src0.yz_, src0.111, src0.000 21: src0.w = temp[24] MAD temp[2].z, src0.__w, src0.111, src0.000 22: src0.xyz = temp[2], src1.xyz = temp[19] DP3, src0.xyz, src1.xyz DP3 temp[29].w, src0._, src0._ 23: src0.xyz = temp[12], src0.w = temp[28] MAD temp[30].xyz, src0.www, src0.xyz, src0.000 24: src0.xyz = temp[20], src0.w = temp[29], src1.xyz = temp[30] MAD temp[31].xyz, src0.xyz, src0.www, src1.xyz 25: src0.xyz = temp[31], src1.xyz = temp[21] MAD color[0].xyz, src0.xyz, src1.xxx, src0.000 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[2] DP3, src0.xyz, src0.xyz DP3 temp[14].w, src0._, src0._ 1: src0.w = temp[14] MAD temp[0].x, src0.w__, src0.111, src0.000 RSQ temp[18].w, |src0.w| 2: BEGIN_TEX; 3: TEX temp[8].xyz, input[1].xy__, 2D[0]; 4: TEX temp[10].xyz, input[1].xy__, 2D[1]; 5: TEX temp[12].xyz, input[1].xy__, 2D[6]; 6: TEX temp[21].x, temp[0].x___, 1D[3] SEM_WAIT SEM_ACQUIRE; 7: src0.xyz = input[3] DP3, src0.xyz, src0.xyz DP3 temp[15].w, src0._, src0._ 8: src0.xyz = temp[8], src0.w = temp[15], src1.xyz = input[0] SEM_WAIT MAD temp[20].xyz, src0.xyz, src1.xyz, src0.000 RSQ temp[16].w, |src0.w| 9: src0.xyz = input[2], src0.w = temp[18] MAD temp[24].yz, src0._ww, src0._xy, src0.000 MAD temp[24].w, src0.w, src0.z, src0.0 10: src0.xyz = temp[10], src1.xyz = const[0] MAD temp[19].xyz, src0.xyz, src1.xxx, -src0.111 11: src0.xyz = input[3], src0.w = temp[16] MAD temp[17].yz, src0._ww, src0._xy, src0.000 MAD temp[17].w, src0.w, src0.z, src0.0 12: src0.xyz = temp[24] MAD temp[2].xy, src0.yz_, src0.111, src0.000 13: src0.w = temp[24] MAD temp[2].z, src0.__w, src0.111, src0.000 14: src0.xyz = temp[17] MAD temp[1].xy, src0.yz_, src0.111, src0.000 15: src0.xyz = temp[2], src1.xyz = temp[19] DP3, src0.xyz, src1.xyz DP3 temp[29].w, src0._, src0._ 16: src0.w = temp[17] MAD temp[1].z, src0.__w, src0.111, src0.000 17: src0.xyz = temp[19], src1.xyz = temp[1] DP3_SAT, src0.xyz, src1.xyz DP3_SAT temp[23].w, src0._, src0._ 18: src0.w = temp[23] LG2 temp[25].w, src0.w 19: src0.xyz = const[0], src0.w = temp[25] MAD temp[26].w, src0.w, src0.z, src0.0 20: src0.w = temp[26] EX2 temp[27].w, src0.w 21: src0.w = input[0], src1.w = temp[27] MAD temp[28].w, src0.w, src1.w, src0.0 22: src0.xyz = temp[12], src0.w = temp[28] MAD temp[30].xyz, src0.www, src0.xyz, src0.000 23: src0.xyz = temp[20], src0.w = temp[29], src1.xyz = temp[30] MAD temp[31].xyz, src0.xyz, src0.www, src1.xyz 24: src0.xyz = temp[31], src1.xyz = temp[21] MAD color[0].xyz, src0.xyz, src1.xxx, src0.000 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: src0.xyz = input[2] DP3, src0.xyz, src0.xyz DP3 temp[14].w, src0._, src0._ 1: src0.w = temp[14] MAD temp[0].x, src0.w__, src0.111, src0.000 RSQ temp[18].w, |src0.w| 2: BEGIN_TEX; 3: TEX temp[8].xyz, input[1].xy__, 2D[0]; 4: TEX temp[10].xyz, input[1].xy__, 2D[1]; 5: TEX temp[12].xyz, input[1].xy__, 2D[6]; 6: TEX temp[21].x, temp[0].x___, 1D[3] SEM_WAIT SEM_ACQUIRE; 7: src0.xyz = input[3] DP3, src0.xyz, src0.xyz DP3 temp[15].w, src0._, src0._ 8: src0.xyz = temp[8], src0.w = temp[15], src1.xyz = input[0] SEM_WAIT MAD temp[20].xyz, src0.xyz, src1.xyz, src0.000 RSQ temp[16].w, |src0.w| 9: src0.xyz = input[2], src0.w = temp[18] MAD temp[24].yz, src0._ww, src0._xy, src0.000 MAD temp[24].w, src0.w, src0.z, src0.0 10: src0.xyz = temp[10], src1.xyz = const[0] MAD temp[19].xyz, src0.xyz, src1.xxx, -src0.111 11: src0.xyz = input[3], src0.w = temp[16] MAD temp[17].yz, src0._ww, src0._xy, src0.000 MAD temp[17].w, src0.w, src0.z, src0.0 12: src0.xyz = temp[24] MAD temp[2].xy, src0.yz_, src0.111, src0.000 13: src0.w = temp[24] MAD temp[2].z, src0.__w, src0.111, src0.000 14: src0.xyz = temp[17] MAD temp[1].xy, src0.yz_, src0.111, src0.000 15: src0.xyz = temp[2], src1.xyz = temp[19] DP3, src0.xyz, src1.xyz DP3 temp[29].w, src0._, src0._ 16: src0.w = temp[17] MAD temp[1].z, src0.__w, src0.111, src0.000 17: src0.xyz = temp[19], src1.xyz = temp[1] DP3_SAT, src0.xyz, src1.xyz DP3_SAT temp[23].w, src0._, src0._ 18: src0.w = temp[23] LG2 temp[25].w, src0.w 19: src0.xyz = const[0], src0.w = temp[25] MAD temp[26].w, src0.w, src0.z, src0.0 20: src0.w = temp[26] EX2 temp[27].w, src0.w 21: src0.w = input[0], src1.w = temp[27] MAD temp[28].w, src0.w, src1.w, src0.0 22: src0.xyz = temp[12], src0.w = temp[28] MAD temp[30].xyz, src0.www, src0.xyz, src0.000 23: src0.xyz = temp[20], src0.w = temp[29], src1.xyz = temp[30] MAD temp[31].xyz, src0.xyz, src0.www, src1.xyz 24: src0.xyz = temp[31], src1.xyz = temp[21] MAD color[0].xyz, src0.xyz, src1.xxx, src0.000 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = input[2] DP3, src0.xyz, src0.xyz DP3 temp[1].w, src0._, src0._ 1: src0.w = temp[1] MAD temp[1].z, src0.__w, src0.__1, src0.__0 RSQ temp[1].w, |src0.w| 2: BEGIN_TEX; 3: TEX temp[4].xyz, input[1].xy__, 2D[0]; 4: TEX temp[5].xyz, input[1].xy__, 2D[1]; 5: TEX temp[6].xyz, input[1].xy__, 2D[6]; 6: TEX temp[7].x, temp[1].z___, 1D[3] SEM_WAIT SEM_ACQUIRE; 7: src0.xyz = input[3] DP3, src0.xyz, src0.xyz DP3 temp[2].w, src0._, src0._ 8: src0.xyz = temp[4], src0.w = temp[2], src1.xyz = input[0] SEM_WAIT MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 RSQ temp[2].w, |src0.w| 9: src0.xyz = input[2], src0.w = temp[1] MAD temp[2].yz, src0._ww, src0._xy, src0._00 MAD temp[1].w, src0.w, src0.z, src0.0 10: src0.xyz = temp[5], src1.xyz = const[0] MAD temp[4].xyz, src0.xyz, src1.xxx, -src0.111 11: src0.xyz = input[3], src0.w = temp[2] MAD temp[3].yz, src0._ww, src0._xy, src0._00 MAD temp[2].w, src0.w, src0.z, src0.0 12: src0.xyz = temp[2] MAD temp[2].xy, src0.yz_, src0.111, src0.000 13: src0.w = temp[1] MAD temp[2].z, src0.__w, src0.111, src0.000 14: src0.xyz = temp[3] MAD temp[3].xy, src0.yz_, src0.111, src0.000 15: src0.xyz = temp[2], src1.xyz = temp[4] DP3, src0.xyz, src1.xyz DP3 temp[1].w, src0._, src0._ 16: src0.w = temp[2] MAD temp[3].z, src0.__w, src0.111, src0.000 17: src0.xyz = temp[4], src1.xyz = temp[3] DP3_SAT, src0.xyz, src1.xyz DP3_SAT temp[2].w, src0._, src0._ 18: src0.w = temp[2] LG2 temp[2].w, src0.w 19: src0.xyz = const[0], src0.w = temp[2] MAD temp[2].w, src0.w, src0.z, src0.0 20: src0.w = temp[2] EX2 temp[2].w, src0.w 21: src0.w = input[0], src1.w = temp[2] MAD temp[0].w, src0.w, src1.w, src0.0 22: src0.xyz = temp[6], src0.w = temp[0] MAD temp[0].xyz, src0.www, src0.xyz, src0.000 23: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = temp[0] MAD temp[0].xyz, src0.xyz, src0.www, src1.xyz 24: src0.xyz = temp[0], src1.xyz = temp[7] MAD color[0].xyz, src0.xyz, src1.xxx, src0.000 r300compiler error: compiler/r300_fragprog_emit.c::emit_alu(): Too many ALU instructions r300 FP: Compiler Error: compiler/r300_fragprog_emit.c::emit_alu(): Too many ALU instructions Using a dummy shader instead. r300: Initial fragment program FRAG DCL OUT[0], COLOR IMM FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxy 1: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], none.0001; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[0], none.0001; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], none.0001; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 pc=9************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: 0.0 1.0 0.0 op: 00050a94 w: 1.0 1.0 0.0 op: 00040891 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL IN[1], GENERIC[0], PERSPECTIVE DCL IN[2], GENERIC[1], PERSPECTIVE DCL IN[3], GENERIC[2], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[3] DCL TEMP[0..3] IMM FLT32 { 2.0000, 1.0000, 16.0000, 0.0000} 0: TEX TEMP[0], IN[1], SAMP[1], 2D 1: TEX TEMP[1].xyz, IN[1], SAMP[0], 2D 2: DP3 TEMP[2].x, IN[2], IN[2] 3: MAD TEMP[3].yzw, TEMP[0].xxyz, IMM[0].xxxx, -IMM[0].yyyy 4: DP3 TEMP[1].w, IN[3], IN[3] 5: RSQ TEMP[0].y, TEMP[1].wwww 6: RSQ TEMP[0].x, TEMP[2].xxxx 7: TEX TEMP[3].x, TEMP[2], SAMP[3], 1D 8: MUL TEMP[2].xyz, TEMP[0].yyyy, IN[3] 9: MUL TEMP[0].xyz, TEMP[0].xxxx, IN[2] 10: DP3_SAT TEMP[1].w, TEMP[3].yzww, TEMP[2] 11: DP3 TEMP[3].y, TEMP[0], TEMP[3].yzww 12: POW TEMP[0].x, TEMP[1].wwww, IMM[0].zzzz 13: MOV_SAT TEMP[0].y, TEMP[0].wwww 14: MUL TEMP[0].x, IN[0].wwww, TEMP[0] 15: MUL TEMP[0].w, TEMP[0].xxxx, TEMP[0].yyyy 16: MUL TEMP[0].xyz, TEMP[1], IN[0] 17: MAD TEMP[0].xyz, TEMP[0], TEMP[3].yyyy, TEMP[0].wwww 18: MUL OUT[0].xyz, TEMP[0], TEMP[3].xxxx 19: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[1], 2D[1]; 1: TEX temp[1].xyz, input[1], 2D[0]; 2: DP3 temp[2].x, input[2], input[2]; 3: MAD temp[3].yzw, temp[0].xxyz, const[0].xxxx, -const[0].yyyy; 4: DP3 temp[1].w, input[3], input[3]; 5: RSQ temp[0].y, temp[1].wwww; 6: RSQ temp[0].x, temp[2].xxxx; 7: TEX temp[3].x, temp[2], 1D[3]; 8: MUL temp[2].xyz, temp[0].yyyy, input[3]; 9: MUL temp[0].xyz, temp[0].xxxx, input[2]; 10: DP3_SAT temp[1].w, temp[3].yzww, temp[2]; 11: DP3 temp[3].y, temp[0], temp[3].yzww; 12: POW temp[0].x, temp[1].wwww, const[0].zzzz; 13: MOV_SAT temp[0].y, temp[0].wwww; 14: MUL temp[0].x, input[0].wwww, temp[0]; 15: MUL temp[0].w, temp[0].xxxx, temp[0].yyyy; 16: MUL temp[0].xyz, temp[1], input[0]; 17: MAD temp[0].xyz, temp[0], temp[3].yyyy, temp[0].wwww; 18: MUL output[0].xyz, temp[0], temp[3].xxxx; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[1], 2D[1]; 1: TEX temp[1].xyz, input[1], 2D[0]; 2: DP3 temp[2].x, input[2], input[2]; 3: MAD temp[3].yzw, temp[0].xxyz, const[0].xxxx, -const[0].yyyy; 4: DP3 temp[1].w, input[3], input[3]; 5: RSQ temp[0].y, temp[1].wwww; 6: RSQ temp[0].x, temp[2].xxxx; 7: TEX temp[3].x, temp[2], 1D[3]; 8: MUL temp[2].xyz, temp[0].yyyy, input[3]; 9: MUL temp[0].xyz, temp[0].xxxx, input[2]; 10: DP3_SAT temp[1].w, temp[3].yzww, temp[2]; 11: DP3 temp[3].y, temp[0], temp[3].yzww; 12: POW temp[0].x, temp[1].wwww, const[0].zzzz; 13: MOV_SAT temp[0].y, temp[0].wwww; 14: MUL temp[0].x, input[0].wwww, temp[0]; 15: MUL temp[0].w, temp[0].xxxx, temp[0].yyyy; 16: MUL temp[0].xyz, temp[1], input[0]; 17: MAD temp[0].xyz, temp[0], temp[3].yyyy, temp[0].wwww; 18: MUL output[0].xyz, temp[0], temp[3].xxxx; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[1], 2D[1]; 1: TEX temp[1].xyz, input[1], 2D[0]; 2: DP3 temp[2].x, input[2], input[2]; 3: MAD temp[3].yzw, temp[0].xxyz, const[0].xxxx, -const[0].yyyy; 4: DP3 temp[1].w, input[3], input[3]; 5: RSQ temp[0].y, temp[1].wwww; 6: RSQ temp[0].x, temp[2].xxxx; 7: TEX temp[3].x, temp[2], 1D[3]; 8: MUL temp[2].xyz, temp[0].yyyy, input[3]; 9: MUL temp[0].xyz, temp[0].xxxx, input[2]; 10: DP3_SAT temp[1].w, temp[3].yzww, temp[2]; 11: DP3 temp[3].y, temp[0], temp[3].yzww; 12: POW temp[0].x, temp[1].wwww, const[0].zzzz; 13: MOV_SAT temp[0].y, temp[0].wwww; 14: MUL temp[0].x, input[0].wwww, temp[0]; 15: MUL temp[0].w, temp[0].xxxx, temp[0].yyyy; 16: MUL temp[0].xyz, temp[1], input[0]; 17: MAD temp[0].xyz, temp[0], temp[3].yyyy, temp[0].wwww; 18: MUL output[0].xyz, temp[0], temp[3].xxxx; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[1], 2D[1]; 1: TEX temp[1].xyz, input[1], 2D[0]; 2: DP3 temp[2].x, input[2], input[2]; 3: MAD temp[3].yzw, temp[0].xxyz, const[0].xxxx, -const[0].yyyy; 4: DP3 temp[1].w, input[3], input[3]; 5: RSQ temp[0].y, temp[1].wwww; 6: RSQ temp[0].x, temp[2].xxxx; 7: TEX temp[3].x, temp[2], 1D[3]; 8: MUL temp[2].xyz, temp[0].yyyy, input[3]; 9: MUL temp[0].xyz, temp[0].xxxx, input[2]; 10: DP3_SAT temp[1].w, temp[3].yzww, temp[2]; 11: DP3 temp[3].y, temp[0], temp[3].yzww; 12: POW temp[0].x, temp[1].wwww, const[0].zzzz; 13: MOV_SAT temp[0].y, temp[0].wwww; 14: MUL temp[0].x, input[0].wwww, temp[0]; 15: MUL temp[0].w, temp[0].xxxx, temp[0].yyyy; 16: MUL temp[0].xyz, temp[1], input[0]; 17: MAD temp[0].xyz, temp[0], temp[3].yyyy, temp[0].wwww; 18: MUL output[0].xyz, temp[0], temp[3].xxxx; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[1], 2D[1]; 1: TEX temp[1].xyz, input[1], 2D[0]; 2: DP3 temp[2].x, input[2], input[2]; 3: MAD temp[3].yzw, temp[0].xxyz, const[0].xxxx, -const[0].yyyy; 4: DP3 temp[1].w, input[3], input[3]; 5: RSQ temp[0].y, temp[1].wwww; 6: RSQ temp[0].x, temp[2].xxxx; 7: TEX temp[3].x, temp[2], 1D[3]; 8: MUL temp[2].xyz, temp[0].yyyy, input[3]; 9: MUL temp[0].xyz, temp[0].xxxx, input[2]; 10: DP3_SAT temp[1].w, temp[3].yzww, temp[2]; 11: DP3 temp[3].y, temp[0], temp[3].yzww; 12: POW temp[0].x, temp[1].wwww, const[0].zzzz; 13: MOV_SAT temp[0].y, temp[0].wwww; 14: MUL temp[0].x, input[0].wwww, temp[0]; 15: MUL temp[0].w, temp[0].xxxx, temp[0].yyyy; 16: MUL temp[0].xyz, temp[1], input[0]; 17: MAD temp[0].xyz, temp[0], temp[3].yyyy, temp[0].wwww; 18: MUL output[0].xyz, temp[0], temp[3].xxxx; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[1], 2D[1]; 1: TEX temp[4], input[1], 2D[0]; 2: MOV temp[1].xyz, temp[4]; 3: DP3 temp[2].x, input[2], input[2]; 4: MAD temp[3].yzw, temp[0].xxyz, const[0].xxxx, -const[0].yyyy; 5: DP3 temp[1].w, input[3], input[3]; 6: RSQ temp[0].y, temp[1].wwww; 7: RSQ temp[0].x, temp[2].xxxx; 8: TEX temp[5], temp[2], 1D[3]; 9: MOV temp[3].x, temp[5]; 10: MUL temp[2].xyz, temp[0].yyyy, input[3]; 11: MUL temp[0].xyz, temp[0].xxxx, input[2]; 12: DP3_SAT temp[1].w, temp[3].yzww, temp[2]; 13: DP3 temp[3].y, temp[0], temp[3].yzww; 14: POW temp[0].x, temp[1].wwww, const[0].zzzz; 15: MOV_SAT temp[0].y, temp[0].wwww; 16: MUL temp[0].x, input[0].wwww, temp[0]; 17: MUL temp[0].w, temp[0].xxxx, temp[0].yyyy; 18: MUL temp[0].xyz, temp[1], input[0]; 19: MAD temp[0].xyz, temp[0], temp[3].yyyy, temp[0].wwww; 20: MUL output[0].xyz, temp[0], temp[3].xxxx; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[1], 2D[1]; 1: TEX temp[4], input[1], 2D[0]; 2: MOV temp[1].xyz, temp[4]; 3: DP3 temp[2].x, input[2], input[2]; 4: MAD temp[3].yzw, temp[0].xxyz, const[0].xxxx, -const[0].yyyy; 5: DP3 temp[1].w, input[3], input[3]; 6: RSQ temp[0].y, |temp[1].wwww|; 7: RSQ temp[0].x, |temp[2].xxxx|; 8: TEX temp[5], temp[2], 1D[3]; 9: MOV temp[3].x, temp[5]; 10: MUL temp[2].xyz, temp[0].yyyy, input[3]; 11: MUL temp[0].xyz, temp[0].xxxx, input[2]; 12: DP3_SAT temp[1].w, temp[3].yzww, temp[2]; 13: DP3 temp[3].y, temp[0], temp[3].yzww; 14: LG2 temp[0].w, temp[1].wwww; 15: MUL temp[0].w, temp[0].wwww, const[0].zzzz; 16: EX2 temp[0].x, temp[0].wwww; 17: MOV_SAT temp[0].y, temp[0].wwww; 18: MUL temp[0].x, input[0].wwww, temp[0]; 19: MUL temp[0].w, temp[0].xxxx, temp[0].yyyy; 20: MUL temp[0].xyz, temp[1], input[0]; 21: MAD temp[0].xyz, temp[0], temp[3].yyyy, temp[0].wwww; 22: MUL output[0].xyz, temp[0], temp[3].xxxx; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0].xyz, input[1].xy__, 2D[1]; 1: TEX temp[4].xyz, input[1].xy__, 2D[0]; 2: MOV temp[1].xyz, temp[4].xyz_; 3: DP3 temp[2].x, input[2].xyz_, input[2].xyz_; 4: MAD temp[3].yzw, temp[0]._xyz, const[0]._xxx, -const[0]._yyy; 5: DP3 temp[1].w, input[3].xyz_, input[3].xyz_; 6: RSQ temp[0].y, |temp[1]._w__|; 7: RSQ temp[0].x, |temp[2].x___|; 8: TEX temp[5].x, temp[2].x___, 1D[3]; 9: MOV temp[3].x, temp[5].x___; 10: MUL temp[2].xyz, temp[0].yyy_, input[3].xyz_; 11: MUL temp[0].xyz, temp[0].xxx_, input[2].xyz_; 12: DP3_SAT temp[1].w, temp[3].yzw_, temp[2].xyz_; 13: DP3 temp[3].y, temp[0].xyz_, temp[3].yzw_; 14: LG2 temp[0].w, temp[1].___w; 15: MUL temp[0].w, temp[0].___w, const[0].___z; 16: EX2 temp[0].x, temp[0].w___; 17: MOV_SAT temp[0].y, temp[0]._w__; 18: MUL temp[0].x, input[0].w___, temp[0].x___; 19: MUL temp[0].w, temp[0].___x, temp[0].___y; 20: MUL temp[0].xyz, temp[1].xyz_, input[0].xyz_; 21: MAD temp[0].xyz, temp[0].xyz_, temp[3].yyy_, temp[0].www_; 22: MUL output[0].xyz, temp[0].xyz_, temp[3].xxx_; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0].xyz, input[1].xy__, 2D[1]; 1: TEX temp[4].xyz, input[1].xy__, 2D[0]; 2: MOV temp[1].xyz, temp[4].xyz_; 3: DP3 temp[2].x, input[2].xyz_, input[2].xyz_; 4: MAD temp[3].yzw, temp[0]._xyz, const[0]._xxx, -const[0]._yyy; 5: DP3 temp[1].w, input[3].xyz_, input[3].xyz_; 6: RSQ temp[0].y, |temp[1]._w__|; 7: RSQ temp[0].x, |temp[2].x___|; 8: TEX temp[5].x, temp[2].x___, 1D[3]; 9: MOV temp[3].x, temp[5].x___; 10: MUL temp[2].xyz, temp[0].yyy_, input[3].xyz_; 11: MUL temp[0].xyz, temp[0].xxx_, input[2].xyz_; 12: DP3_SAT temp[1].w, temp[3].yzw_, temp[2].xyz_; 13: DP3 temp[3].y, temp[0].xyz_, temp[3].yzw_; 14: LG2 temp[0].w, temp[1].___w; 15: MUL temp[0].w, temp[0].___w, const[0].___z; 16: EX2 temp[0].x, temp[0].w___; 17: MOV_SAT temp[0].y, temp[0]._w__; 18: MUL temp[0].x, input[0].w___, temp[0].x___; 19: MUL temp[0].w, temp[0].___x, temp[0].___y; 20: MUL temp[0].xyz, temp[1].xyz_, input[0].xyz_; 21: MAD temp[0].xyz, temp[0].xyz_, temp[3].yyy_, temp[0].www_; 22: MUL output[0].xyz, temp[0].xyz_, temp[3].xxx_; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[6].xyz, input[1].xy__, 2D[1]; 1: TEX temp[7].xyz, input[1].xy__, 2D[0]; 2: MOV temp[8].xyz, temp[7].xyz_; 3: DP3 temp[9].x, input[2].xyz_, input[2].xyz_; 4: MAD temp[10].yzw, temp[6]._xyz, const[0]._xxx, -const[0]._yyy; 5: DP3 temp[11].w, input[3].xyz_, input[3].xyz_; 6: RSQ temp[12].y, |temp[11]._w__|; 7: RSQ temp[13].x, |temp[9].x___|; 8: TEX temp[14].x, temp[9].x___, 1D[3]; 9: MOV temp[15].x, temp[14].x___; 10: MUL temp[16].xyz, temp[12].yyy_, input[3].xyz_; 11: MUL temp[17].xyz, temp[13].xxx_, input[2].xyz_; 12: DP3_SAT temp[18].w, temp[10].yzw_, temp[16].xyz_; 13: DP3 temp[19].y, temp[17].xyz_, temp[10].yzw_; 14: LG2 temp[20].w, temp[18].___w; 15: MUL temp[21].w, temp[20].___w, const[0].___z; 16: EX2 temp[22].x, temp[21].w___; 17: MOV_SAT temp[23].y, temp[21]._w__; 18: MUL temp[24].x, input[0].w___, temp[22].x___; 19: MUL temp[25].w, temp[24].___x, temp[23].___y; 20: MUL temp[26].xyz, temp[8].xyz_, input[0].xyz_; 21: MAD temp[27].xyz, temp[26].xyz_, temp[19].yyy_, temp[25].www_; 22: MUL output[0].xyz, temp[27].xyz_, temp[15].xxx_; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[6].xyz, input[1].xy__, 2D[1]; 1: TEX temp[7].xyz, input[1].xy__, 2D[0]; 2: DP3 temp[9].x, input[2].xyz_, input[2].xyz_; 3: MAD temp[10].yzw, temp[6]._xyz, const[0]._xxx, -none._111; 4: DP3 temp[11].w, input[3].xyz_, input[3].xyz_; 5: RSQ temp[12].y, |temp[11]._w__|; 6: RSQ temp[13].x, |temp[9].x___|; 7: TEX temp[14].x, temp[9].x___, 1D[3]; 8: MUL temp[16].xyz, temp[12].yyy_, input[3].xyz_; 9: MUL temp[17].xyz, temp[13].xxx_, input[2].xyz_; 10: DP3_SAT temp[18].w, temp[10].yzw_, temp[16].xyz_; 11: DP3 temp[19].y, temp[17].xyz_, temp[10].yzw_; 12: LG2 temp[20].w, temp[18].___w; 13: MUL temp[21].w, temp[20].___w, const[0].___z; 14: EX2 temp[22].x, temp[21].w___; 15: MOV_SAT temp[23].y, temp[21]._w__; 16: MUL temp[24].x, input[0].w___, temp[22].x___; 17: MUL temp[25].w, temp[24].___x, temp[23].___y; 18: MUL temp[26].xyz, temp[7].xyz_, input[0].xyz_; 19: MAD temp[27].xyz, temp[26].xyz_, temp[19].yyy_, temp[25].www_; 20: MUL output[0].xyz, temp[27].xyz_, temp[14].xxx_; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[6].xyz, input[1].xy__, 2D[1]; 1: TEX temp[7].xyz, input[1].xy__, 2D[0]; 2: DP3 temp[9].x, input[2].xyz_, input[2].xyz_; 3: MAD temp[10].yzw, temp[6]._xyz, const[0]._xxx, -none._111; 4: DP3 temp[11].w, input[3].xyz_, input[3].xyz_; 5: RSQ temp[12].y, |temp[11]._w__|; 6: RSQ temp[13].x, |temp[9].x___|; 7: TEX temp[14].x, temp[9].x___, 1D[3]; 8: MUL temp[16].xyz, temp[12].yyy_, input[3].xyz_; 9: MUL temp[17].xyz, temp[13].xxx_, input[2].xyz_; 10: MOV temp[0].xy, temp[10].yz__; 11: MOV temp[0].z, temp[10].__w_; 12: DP3_SAT temp[18].w, temp[0].xyz_, temp[16].xyz_; 13: MOV temp[1].xy, temp[10].yz__; 14: MOV temp[1].z, temp[10].__w_; 15: DP3 temp[19].y, temp[17].xyz_, temp[1].xyz_; 16: LG2 temp[20].w, temp[18].___w; 17: MUL temp[21].w, temp[20].___w, const[0].___z; 18: EX2 temp[22].x, temp[21].w___; 19: MOV_SAT temp[23].y, temp[21]._w__; 20: MUL temp[24].x, input[0].w___, temp[22].x___; 21: MUL temp[25].w, temp[24].___x, temp[23].___y; 22: MUL temp[26].xyz, temp[7].xyz_, input[0].xyz_; 23: MAD temp[27].xyz, temp[26].xyz_, temp[19].yyy_, temp[25].www_; 24: MUL output[0].xyz, temp[27].xyz_, temp[14].xxx_; CONST[0] = { 2.0000 1.0000 16.0000 0.0000 } Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[6].xyz, input[1].xy__, 2D[1]; 1: TEX temp[7].xyz, input[1].xy__, 2D[0]; 2: DP3 temp[9].x, input[2].xyz_, input[2].xyz_; 3: MAD temp[10].yzw, temp[6]._xyz, const[0]._xxx, -none._111; 4: DP3 temp[11].w, input[3].xyz_, input[3].xyz_; 5: RSQ temp[12].y, |temp[11]._w__|; 6: RSQ temp[13].x, |temp[9].x___|; 7: TEX temp[14].x, temp[9].x___, 1D[3]; 8: MUL temp[16].xyz, temp[12].yyy_, input[3].xyz_; 9: MUL temp[17].xyz, temp[13].xxx_, input[2].xyz_; 10: MOV temp[0].xy, temp[10].yz__; 11: MOV temp[0].z, temp[10].__w_; 12: DP3_SAT temp[18].w, temp[0].xyz_, temp[16].xyz_; 13: MOV temp[1].xy, temp[10].yz__; 14: MOV temp[1].z, temp[10].__w_; 15: DP3 temp[19].y, temp[17].xyz_, temp[1].xyz_; 16: LG2 temp[20].w, temp[18].___w; 17: MUL temp[21].w, temp[20].___w, const[0].___z; 18: EX2 temp[22].x, temp[21].w___; 19: MOV_SAT temp[23].y, temp[21]._w__; 20: MUL temp[24].x, input[0].w___, temp[22].x___; 21: MUL temp[25].w, temp[24].___x, temp[23].___y; 22: MUL temp[26].xyz, temp[7].xyz_, input[0].xyz_; 23: MAD temp[27].xyz, temp[26].xyz_, temp[19].yyy_, temp[25].www_; 24: MUL output[0].xyz, temp[27].xyz_, temp[14].xxx_; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[6].xyz, input[1].xy__, 2D[1]; 1: TEX temp[7].xyz, input[1].xy__, 2D[0]; 2: src0.xyz = input[2] DP3 temp[9].x, src0.xyz, src0.xyz 3: src0.xyz = temp[6], src1.xyz = const[0] MAD temp[10].yz, src0._xy, src1._xx, -src0._11 MAD temp[10].w, src0.z, src1.x, -src0.1 4: src0.xyz = input[3] DP3, src0.xyz, src0.xyz DP3 temp[11].w, src0._, src0._ 5: src0.w = temp[11] REPL_ALPHA temp[12].y RSQ, |src0.w| 6: src0.xyz = temp[9] REPL_ALPHA temp[13].x RSQ, |src0.x| 7: TEX temp[14].x, temp[9].x___, 1D[3]; 8: src0.xyz = temp[12], src1.xyz = input[3] MAD temp[16].xyz, src0.yyy, src1.xyz, src0.000 9: src0.xyz = temp[13], src1.xyz = input[2] MAD temp[17].xyz, src0.xxx, src1.xyz, src0.000 10: src0.xyz = temp[10] MAD temp[0].xy, src0.yz_, src0.111, src0.000 11: src0.w = temp[10] MAD temp[0].z, src0.__w, src0.111, src0.000 12: src0.xyz = temp[0], src1.xyz = temp[16] DP3_SAT, src0.xyz, src1.xyz DP3_SAT temp[18].w, src0._, src0._ 13: src0.xyz = temp[10] MAD temp[1].xy, src0.yz_, src0.111, src0.000 14: src0.w = temp[10] MAD temp[1].z, src0.__w, src0.111, src0.000 15: src0.xyz = temp[17], src1.xyz = temp[1] DP3 temp[19].y, src0.xyz, src1.xyz 16: src0.w = temp[18] LG2 temp[20].w, src0.w 17: src0.xyz = const[0], src0.w = temp[20] MAD temp[21].w, src0.w, src0.z, src0.0 18: src0.w = temp[21] REPL_ALPHA temp[22].x EX2, src0.w 19: src0.w = temp[21] MAD_SAT temp[23].y, src0._w_, src0.111, src0.000 20: src0.xyz = temp[22], src0.w = input[0] MAD temp[24].x, src0.w__, src0.x__, src0.000 21: src0.xyz = temp[24], src1.xyz = temp[23] MAD temp[25].w, src0.x, src1.y, src0.0 22: src0.xyz = temp[7], src1.xyz = input[0] MAD temp[26].xyz, src0.xyz, src1.xyz, src0.000 23: src0.xyz = temp[26], src0.w = temp[25], src1.xyz = temp[19] MAD temp[27].xyz, src0.xyz, src1.yyy, src0.www 24: src0.xyz = temp[27], src1.xyz = temp[14] MAD color[0].xyz, src0.xyz, src1.xxx, src0.000 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[3] DP3, src0.xyz, src0.xyz DP3 temp[11].w, src0._, src0._ 1: src0.xyz = input[2] DP3 temp[9].x, src0.xyz, src0.xyz 2: BEGIN_TEX; 3: TEX temp[6].xyz, input[1].xy__, 2D[1]; 4: TEX temp[7].xyz, input[1].xy__, 2D[0]; 5: TEX temp[14].x, temp[9].x___, 1D[3] SEM_WAIT SEM_ACQUIRE; 6: src0.xyz = temp[7], src0.w = temp[11], src1.xyz = input[0] SEM_WAIT MAD temp[26].xyz, src0.xyz, src1.xyz, src0.000 RSQ temp[12].w, |src0.w| 7: src0.xyz = temp[12], src0.w = temp[12], src1.xyz = input[3], src2.xyz = temp[9] MAD temp[16].xyz, src0.www, src1.xyz, src0.000 RSQ temp[13].w, |src2.x| 8: src0.xyz = temp[6], src1.xyz = const[0] MAD temp[10].yz, src0._xy, src1._xx, -src0._11 MAD temp[10].w, src0.z, src1.x, -src0.1 9: src0.xyz = temp[13], src0.w = temp[13], src1.xyz = input[2] MAD temp[17].xyz, src0.www, src1.xyz, src0.000 10: src0.xyz = temp[10] MAD temp[1].xy, src0.yz_, src0.111, src0.000 11: src0.xyz = temp[10] MAD temp[0].xy, src0.yz_, src0.111, src0.000 12: src0.w = temp[10] MAD temp[1].z, src0.__w, src0.111, src0.000 13: src0.w = temp[10] MAD temp[0].z, src0.__w, src0.111, src0.000 14: src0.xyz = temp[17], src1.xyz = temp[1] DP3 temp[19].y, src0.xyz, src1.xyz 15: src0.xyz = temp[0], src1.xyz = temp[16] DP3_SAT, src0.xyz, src1.xyz DP3_SAT temp[18].w, src0._, src0._ 16: src0.w = temp[18] LG2 temp[20].w, src0.w 17: src0.xyz = const[0], src0.w = temp[20] MAD temp[21].w, src0.w, src0.z, src0.0 18: src0.w = temp[21] REPL_ALPHA temp[22].x EX2, src0.w 19: src0.xyz = temp[22], src0.w = input[0], src1.w = temp[21] MAD temp[24].x, src0.w__, src0.x__, src0.000 MAD_SAT temp[23].w, src1.w, src0.1, src0.0 20: src0.xyz = temp[24], src0.w = temp[23], src1.xyz = temp[23] MAD temp[25].w, src0.x, src0.w, src0.0 21: src0.xyz = temp[26], src0.w = temp[25], src1.xyz = temp[19] MAD temp[27].xyz, src0.xyz, src1.yyy, src0.www 22: src0.xyz = temp[27], src1.xyz = temp[14] MAD color[0].xyz, src0.xyz, src1.xxx, src0.000 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: src0.xyz = input[3] DP3, src0.xyz, src0.xyz DP3 temp[11].w, src0._, src0._ 1: src0.xyz = input[2] DP3 temp[9].x, src0.xyz, src0.xyz 2: BEGIN_TEX; 3: TEX temp[6].xyz, input[1].xy__, 2D[1]; 4: TEX temp[7].xyz, input[1].xy__, 2D[0]; 5: TEX temp[14].x, temp[9].x___, 1D[3] SEM_WAIT SEM_ACQUIRE; 6: src0.xyz = temp[7], src0.w = temp[11], src1.xyz = input[0] SEM_WAIT MAD temp[26].xyz, src0.xyz, src1.xyz, src0.000 RSQ temp[12].w, |src0.w| 7: src0.w = temp[12], src1.xyz = input[3], src2.xyz = temp[9] MAD temp[16].xyz, src0.www, src1.xyz, src0.000 RSQ temp[13].w, |src2.x| 8: src0.xyz = temp[6], src1.xyz = const[0] MAD temp[10].yz, src0._xy, src1._xx, -src0._11 MAD temp[10].w, src0.z, src1.x, -src0.1 9: src0.w = temp[13], src1.xyz = input[2] MAD temp[17].xyz, src0.www, src1.xyz, src0.000 10: src0.xyz = temp[10] MAD temp[1].xy, src0.yz_, src0.111, src0.000 11: src0.xyz = temp[10] MAD temp[0].xy, src0.yz_, src0.111, src0.000 12: src0.w = temp[10] MAD temp[1].z, src0.__w, src0.111, src0.000 13: src0.w = temp[10] MAD temp[0].z, src0.__w, src0.111, src0.000 14: src0.xyz = temp[17], src1.xyz = temp[1] DP3 temp[19].y, src0.xyz, src1.xyz 15: src0.xyz = temp[0], src1.xyz = temp[16] DP3_SAT, src0.xyz, src1.xyz DP3_SAT temp[18].w, src0._, src0._ 16: src0.w = temp[18] LG2 temp[20].w, src0.w 17: src0.xyz = const[0], src0.w = temp[20] MAD temp[21].w, src0.w, src0.z, src0.0 18: src0.w = temp[21] REPL_ALPHA temp[22].x EX2, src0.w 19: src0.xyz = temp[22], src0.w = input[0], src1.w = temp[21] MAD temp[24].x, src0.w__, src0.x__, src0.000 MAD_SAT temp[23].w, src1.w, src0.1, src0.0 20: src0.xyz = temp[24], src0.w = temp[23] MAD temp[25].w, src0.x, src0.w, src0.0 21: src0.xyz = temp[26], src0.w = temp[25], src1.xyz = temp[19] MAD temp[27].xyz, src0.xyz, src1.yyy, src0.www 22: src0.xyz = temp[27], src1.xyz = temp[14] MAD color[0].xyz, src0.xyz, src1.xxx, src0.000 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = input[3] DP3, src0.xyz, src0.xyz DP3 temp[1].w, src0._, src0._ 1: src0.xyz = input[2] DP3 temp[1].z, src0.xyz, src0.xyz 2: BEGIN_TEX; 3: TEX temp[4].xyz, input[1].xy__, 2D[1]; 4: TEX temp[5].xyz, input[1].xy__, 2D[0]; 5: TEX temp[6].x, temp[1].z___, 1D[3] SEM_WAIT SEM_ACQUIRE; 6: src0.xyz = temp[5], src0.w = temp[1], src1.xyz = input[0] SEM_WAIT MAD temp[5].xyz, src0.xyz, src1.xyz, src0.000 RSQ temp[1].w, |src0.w| 7: src0.w = temp[1], src1.xyz = input[3], src2.xyz = temp[1] MAD temp[1].xyz, src0.www, src1.xyz, src0.000 RSQ temp[1].w, |src2.z| 8: src0.xyz = temp[4], src1.xyz = const[0] MAD temp[3].yz, src0._xy, src1._xx, -src0._11 MAD temp[2].w, src0.z, src1.x, -src0.1 9: src0.w = temp[1], src1.xyz = input[2] MAD temp[2].xyz, src0.www, src1.xyz, src0.000 10: src0.xyz = temp[3] MAD temp[4].xy, src0.yz_, src0.111, src0.000 11: src0.xyz = temp[3] MAD temp[3].xy, src0.yz_, src0.111, src0.000 12: src0.w = temp[2] MAD temp[4].z, src0.__w, src0.111, src0.000 13: src0.w = temp[2] MAD temp[3].z, src0.__w, src0.111, src0.000 14: src0.xyz = temp[2], src1.xyz = temp[4] DP3 temp[2].x, src0.xyz, src1.xyz 15: src0.xyz = temp[3], src1.xyz = temp[1] DP3_SAT, src0.xyz, src1.xyz DP3_SAT temp[1].w, src0._, src0._ 16: src0.w = temp[1] LG2 temp[1].w, src0.w 17: src0.xyz = const[0], src0.w = temp[1] MAD temp[1].w, src0.w, src0.z, src0.0 18: src0.w = temp[1] REPL_ALPHA temp[1].x EX2, src0.w 19: src0.xyz = temp[1], src0.w = input[0], src1.w = temp[1] MAD temp[0].x, src0.w__, src0.x__, src0.0__ MAD_SAT temp[0].w, src1.w, src0.1, src0.0 20: src0.xyz = temp[0], src0.w = temp[0] MAD temp[0].w, src0.x, src0.w, src0.0 21: src0.xyz = temp[5], src0.w = temp[0], src1.xyz = temp[2] MAD temp[0].xyz, src0.xyz, src1.xxx, src0.www 22: src0.xyz = temp[0], src1.xyz = temp[6] MAD color[0].xyz, src0.xyz, src1.xxx, src0.000 r300compiler error: compiler/r300_fragprog_emit.c::emit_alu(): Too many ALU instructions r300 FP: Compiler Error: compiler/r300_fragprog_emit.c::emit_alu(): Too many ALU instructions Using a dummy shader instead. r300: Initial fragment program FRAG DCL OUT[0], COLOR IMM FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxy 1: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], none.0001; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[0], none.0001; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], none.0001; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 pc=10************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: 0.0 1.0 0.0 op: 00050a94 w: 1.0 1.0 0.0 op: 00040891 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL IN[1], GENERIC[0], PERSPECTIVE DCL IN[2], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[2] DCL SAMP[3] DCL TEMP[0..1] IMM FLT32 { 0.0000, 2.0000, 1.0000, 0.0000} 0: TEX TEMP[0].xyz, IN[1], SAMP[0], 2D 1: TEX TEMP[1].z, IN[2], SAMP[2], CUBE 2: DP3 TEMP[1].x, IN[2], IN[2] 3: MOV OUT[0].w, IMM[0].xxxx 4: TEX TEMP[1].x, TEMP[1], SAMP[3], 1D 5: MUL TEMP[0].xyz, TEMP[1].xxxx, TEMP[0] 6: MAD TEMP[1].x, TEMP[1].zzzz, IMM[0].yyyy, -IMM[0].zzzz 7: MUL TEMP[0].xyz, TEMP[0], IN[0] 8: MUL OUT[0].xyz, TEMP[0], TEMP[1].xxxx 9: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].z, input[2], CUBE[2]; 2: DP3 temp[1].x, input[2], input[2]; 3: MOV output[0].w, const[0].xxxx; 4: TEX temp[1].x, temp[1], 1D[3]; 5: MUL temp[0].xyz, temp[1].xxxx, temp[0]; 6: MAD temp[1].x, temp[1].zzzz, const[0].yyyy, -const[0].zzzz; 7: MUL temp[0].xyz, temp[0], input[0]; 8: MUL output[0].xyz, temp[0], temp[1].xxxx; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].z, input[2], CUBE[2]; 2: DP3 temp[1].x, input[2], input[2]; 3: MOV output[0].w, const[0].xxxx; 4: TEX temp[1].x, temp[1], 1D[3]; 5: MUL temp[0].xyz, temp[1].xxxx, temp[0]; 6: MAD temp[1].x, temp[1].zzzz, const[0].yyyy, -const[0].zzzz; 7: MUL temp[0].xyz, temp[0], input[0]; 8: MUL output[0].xyz, temp[0], temp[1].xxxx; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].z, input[2], CUBE[2]; 2: DP3 temp[1].x, input[2], input[2]; 3: MOV output[0].w, const[0].xxxx; 4: TEX temp[1].x, temp[1], 1D[3]; 5: MUL temp[0].xyz, temp[1].xxxx, temp[0]; 6: MAD temp[1].x, temp[1].zzzz, const[0].yyyy, -const[0].zzzz; 7: MUL temp[0].xyz, temp[0], input[0]; 8: MUL output[0].xyz, temp[0], temp[1].xxxx; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].z, input[2], CUBE[2]; 2: DP3 temp[1].x, input[2], input[2]; 3: MOV output[0].w, const[0].xxxx; 4: TEX temp[1].x, temp[1], 1D[3]; 5: MUL temp[0].xyz, temp[1].xxxx, temp[0]; 6: MAD temp[1].x, temp[1].zzzz, const[0].yyyy, -const[0].zzzz; 7: MUL temp[0].xyz, temp[0], input[0]; 8: MUL output[0].xyz, temp[0], temp[1].xxxx; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0].xyz, input[1], 2D[0]; 1: TEX temp[1].z, input[2], CUBE[2]; 2: DP3 temp[1].x, input[2], input[2]; 3: MOV output[0].w, const[0].xxxx; 4: TEX temp[1].x, temp[1], 1D[3]; 5: MUL temp[0].xyz, temp[1].xxxx, temp[0]; 6: MAD temp[1].x, temp[1].zzzz, const[0].yyyy, -const[0].zzzz; 7: MUL temp[0].xyz, temp[0], input[0]; 8: MUL output[0].xyz, temp[0], temp[1].xxxx; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[2], input[1], 2D[0]; 1: MOV temp[0].xyz, temp[2]; 2: TEX temp[3], input[2], CUBE[2]; 3: MOV temp[1].z, temp[3]; 4: DP3 temp[1].x, input[2], input[2]; 5: MOV output[0].w, const[0].xxxx; 6: TEX temp[4], temp[1], 1D[3]; 7: MOV temp[1].x, temp[4]; 8: MUL temp[0].xyz, temp[1].xxxx, temp[0]; 9: MAD temp[1].x, temp[1].zzzz, const[0].yyyy, -const[0].zzzz; 10: MUL temp[0].xyz, temp[0], input[0]; 11: MUL output[0].xyz, temp[0], temp[1].xxxx; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[2], input[1], 2D[0]; 1: MOV temp[0].xyz, temp[2]; 2: TEX temp[3], input[2], CUBE[2]; 3: MOV temp[1].z, temp[3]; 4: DP3 temp[1].x, input[2], input[2]; 5: MOV output[0].w, const[0].xxxx; 6: TEX temp[4], temp[1], 1D[3]; 7: MOV temp[1].x, temp[4]; 8: MUL temp[0].xyz, temp[1].xxxx, temp[0]; 9: MAD temp[1].x, temp[1].zzzz, const[0].yyyy, -const[0].zzzz; 10: MUL temp[0].xyz, temp[0], input[0]; 11: MUL output[0].xyz, temp[0], temp[1].xxxx; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[2].xyz, input[1].xy__, 2D[0]; 1: MOV temp[0].xyz, temp[2].xyz_; 2: TEX temp[3].z, input[2].xyz_, CUBE[2]; 3: MOV temp[1].z, temp[3].__z_; 4: DP3 temp[1].x, input[2].xyz_, input[2].xyz_; 5: MOV output[0].w, const[0].___x; 6: TEX temp[4].x, temp[1].x___, 1D[3]; 7: MOV temp[1].x, temp[4].x___; 8: MUL temp[0].xyz, temp[1].xxx_, temp[0].xyz_; 9: MAD temp[1].x, temp[1].z___, const[0].y___, -const[0].z___; 10: MUL temp[0].xyz, temp[0].xyz_, input[0].xyz_; 11: MUL output[0].xyz, temp[0].xyz_, temp[1].xxx_; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[2].xyz, input[1].xy__, 2D[0]; 1: MOV temp[0].xyz, temp[2].xyz_; 2: TEX temp[3].z, input[2].xyz_, CUBE[2]; 3: MOV temp[1].z, temp[3].__z_; 4: DP3 temp[1].x, input[2].xyz_, input[2].xyz_; 5: MOV output[0].w, const[0].___x; 6: TEX temp[4].x, temp[1].x___, 1D[3]; 7: MOV temp[1].x, temp[4].x___; 8: MUL temp[0].xyz, temp[1].xxx_, temp[0].xyz_; 9: MAD temp[1].x, temp[1].z___, const[0].y___, -const[0].z___; 10: MUL temp[0].xyz, temp[0].xyz_, input[0].xyz_; 11: MUL output[0].xyz, temp[0].xyz_, temp[1].xxx_; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[5].xyz, input[1].xy__, 2D[0]; 1: MOV temp[6].xyz, temp[5].xyz_; 2: TEX temp[7].z, input[2].xyz_, CUBE[2]; 3: MOV temp[8].z, temp[7].__z_; 4: DP3 temp[9].x, input[2].xyz_, input[2].xyz_; 5: MOV output[0].w, const[0].___x; 6: TEX temp[10].x, temp[9].x___, 1D[3]; 7: MOV temp[11].x, temp[10].x___; 8: MUL temp[12].xyz, temp[11].xxx_, temp[6].xyz_; 9: MAD temp[13].x, temp[8].z___, const[0].y___, -const[0].z___; 10: MUL temp[14].xyz, temp[12].xyz_, input[0].xyz_; 11: MUL output[0].xyz, temp[14].xyz_, temp[13].xxx_; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[5].xyz, input[1].xy__, 2D[0]; 1: TEX temp[7].z, input[2].xyz_, CUBE[2]; 2: DP3 temp[9].x, input[2].xyz_, input[2].xyz_; 3: MOV output[0].w, none.___0; 4: TEX temp[10].x, temp[9].x___, 1D[3]; 5: MUL temp[12].xyz, temp[10].xxx_, temp[5].xyz_; 6: MAD temp[13].x, temp[7].z___, const[0].y___, -none.1___; 7: MUL temp[14].xyz, temp[12].xyz_, input[0].xyz_; 8: MUL output[0].xyz, temp[14].xyz_, temp[13].xxx_; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[5].xyz, input[1].xy__, 2D[0]; 1: TEX temp[7].z, input[2].xyz_, CUBE[2]; 2: DP3 temp[9].x, input[2].xyz_, input[2].xyz_; 3: MOV output[0].w, none.___0; 4: TEX temp[10].x, temp[9].x___, 1D[3]; 5: MUL temp[12].xyz, temp[10].xxx_, temp[5].xyz_; 6: MAD temp[13].x, temp[7].z___, const[0].y___, -none.1___; 7: MUL temp[14].xyz, temp[12].xyz_, input[0].xyz_; 8: MUL output[0].xyz, temp[14].xyz_, temp[13].xxx_; CONST[0] = { 0.0000 2.0000 1.0000 0.0000 } Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[5].xyz, input[1].xy__, 2D[0]; 1: TEX temp[7].z, input[2].xyz_, CUBE[2]; 2: DP3 temp[9].x, input[2].xyz_, input[2].xyz_; 3: MOV output[0].w, none.___0; 4: TEX temp[10].x, temp[9].x___, 1D[3]; 5: MUL temp[12].xyz, temp[10].xxx_, temp[5].xyz_; 6: MAD temp[13].x, temp[7].z___, const[0].y___, -none.1___; 7: MUL temp[14].xyz, temp[12].xyz_, input[0].xyz_; 8: MUL output[0].xyz, temp[14].xyz_, temp[13].xxx_; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[5].xyz, input[1].xy__, 2D[0]; 1: TEX temp[7].z, input[2].xyz_, CUBE[2]; 2: src0.xyz = input[2] DP3 temp[9].x, src0.xyz, src0.xyz 3: MAD color[0].w, src0.0, src0.1, src0.0 4: TEX temp[10].x, temp[9].x___, 1D[3]; 5: src0.xyz = temp[10], src1.xyz = temp[5] MAD temp[12].xyz, src0.xxx, src1.xyz, src0.000 6: src0.xyz = temp[7], src1.xyz = const[0] MAD temp[13].x, src0.z__, src1.y__, -src0.1__ 7: src0.xyz = temp[12], src1.xyz = input[0] MAD temp[14].xyz, src0.xyz, src1.xyz, src0.000 8: src0.xyz = temp[14], src1.xyz = temp[13] MAD color[0].xyz, src0.xyz, src1.xxx, src0.000 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[2] DP3 temp[9].x, src0.xyz, src0.xyz 1: BEGIN_TEX; 2: TEX temp[5].xyz, input[1].xy__, 2D[0]; 3: TEX temp[7].z, input[2].xyz_, CUBE[2]; 4: TEX temp[10].x, temp[9].x___, 1D[3] SEM_WAIT SEM_ACQUIRE; 5: src0.xyz = temp[10], src1.xyz = temp[5] SEM_WAIT MAD temp[12].xyz, src0.xxx, src1.xyz, src0.000 6: src0.xyz = temp[7], src1.xyz = const[0] MAD temp[13].w, src0.z, src1.y, -src0.1 7: src0.xyz = temp[12], src1.xyz = input[0] MAD temp[14].xyz, src0.xyz, src1.xyz, src0.000 8: src0.xyz = temp[14], src0.w = temp[13], src1.xyz = temp[13] MAD color[0].xyz, src0.xyz, src0.www, src0.000 MAD color[0].w, src0.0, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: src0.xyz = input[2] DP3 temp[9].x, src0.xyz, src0.xyz 1: BEGIN_TEX; 2: TEX temp[5].xyz, input[1].xy__, 2D[0]; 3: TEX temp[7].z, input[2].xyz_, CUBE[2]; 4: TEX temp[10].x, temp[9].x___, 1D[3] SEM_WAIT SEM_ACQUIRE; 5: src0.xyz = temp[10], src1.xyz = temp[5] SEM_WAIT MAD temp[12].xyz, src0.xxx, src1.xyz, src0.000 6: src0.xyz = temp[7], src1.xyz = const[0] MAD temp[13].w, src0.z, src1.y, -src0.1 7: src0.xyz = temp[12], src1.xyz = input[0] MAD temp[14].xyz, src0.xyz, src1.xyz, src0.000 8: src0.xyz = temp[14], src0.w = temp[13] MAD color[0].xyz, src0.xyz, src0.www, src0.000 MAD color[0].w, src0.0, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = input[2] DP3 temp[1].z, src0.xyz, src0.xyz 1: BEGIN_TEX; 2: TEX temp[3].xyz, input[1].xy__, 2D[0]; 3: TEX temp[2].z, input[2].xyz_, CUBE[2]; 4: TEX temp[1].x, temp[1].z___, 1D[3] SEM_WAIT SEM_ACQUIRE; 5: src0.xyz = temp[1], src1.xyz = temp[3] SEM_WAIT MAD temp[1].xyz, src0.xxx, src1.xyz, src0.000 6: src0.xyz = temp[2], src1.xyz = const[0] MAD temp[0].w, src0.z, src1.y, -src0.1 7: src0.xyz = temp[1], src1.xyz = input[0] MAD temp[0].xyz, src0.xyz, src1.xyz, src0.000 8: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.www, src0.000 MAD color[0].w, src0.0, src0.1, src0.0 pc=11************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00000000) 0: xyz: t2 t0 t0 bias-> t1.z (02040002) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz t2.xyz t2.xxx op: 00804000 w: t2.x t2.x t2.x op: 00000000 NODE 1: alu_offset: 1, tex_offset: 0, alu_end: 3, tex_end: 2 (code_addr: 004400c1) TEX: TEX t3, t1, texture[0] (000080c1) TEX t2, t2, texture[2] (00009082) TEX t1, t1, texture[3] (00009841) 1: xyz: t1 t3 t0 bias-> t1.xyz (038400c1) w: t0 t0 t0 bias-> (00000000) xyz: t1.xxx t3.xyz 0.0 op: 00050201 w: t1.x t1.x t1.x op: 00000000 2: xyz: t2 c0 t0 bias-> (00000802) w: t0 t0 t0 bias-> t0.w (00800000) xyz: t2.xxx t2.xxx t2.xxx op: 00004081 w: t2.z c0.y -1.0 op: 000c4202 3: xyz: t1 t0 t0 bias-> t0.xyz (03800001) w: t0 t0 t0 bias-> (00000000) xyz: t1.xyz t0.xyz 0.0 op: 00050200 w: t1.x t1.x t1.x op: 00000000 4: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz t0.www 0.0 op: 00050600 w: 0.0 1.0 0.0 op: 00040890 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL IN[1], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0] 0: TEX TEMP[0], IN[1], SAMP[0], 2D 1: MUL OUT[0], TEMP[0], IN[0] 2: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[1], 2D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[1], 2D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[1], 2D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[1], 2D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[1], 2D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[1], 2D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[1], 2D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[1].xy__, 2D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[1], input[1].xy__, 2D[0]; 1: MUL output[0], temp[1], input[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[1], input[1].xy__, 2D[0]; 1: MUL output[0], temp[1], input[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[1], input[1].xy__, 2D[0]; 1: MUL output[0], temp[1], input[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[1], input[1].xy__, 2D[0]; 1: MUL output[0], temp[1], input[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[1], input[1].xy__, 2D[0]; 1: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = input[0], src1.w = input[0] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], input[1].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = input[0], src1.w = input[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], input[1].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = input[0], src1.w = input[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], input[1].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = input[0], src1.w = input[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 pc=12************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) TEX: TEX t1, t1, texture[0] (00008041) 0: xyz: t1 t0 t0 bias-> o0.xyz (1c000001) w: t1 t0 t0 bias-> o0.w (01000001) xyz: t1.xyz t0.xyz 0.0 op: 00050200 w: t1.w t0.w 0.0 op: 00040509 r300: Initial vertex program VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[0] DCL CONST[0..6] DCL TEMP[0] IMM FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].x, CONST[0] 1: ADD TEMP[0].y, -TEMP[0].xxxx, CONST[1].xxxx 2: DP4 TEMP[0].x, IN[0], CONST[2] 3: RCP TEMP[0].y, TEMP[0].yyyy 4: ADD TEMP[0].z, -TEMP[0].xxxx, CONST[1].xxxx 5: MOV OUT[1].xyz, CONST[3] 6: MUL OUT[2].x, TEMP[0].zzzz, TEMP[0].yyyy 7: DP4 OUT[0].w, IN[0], CONST[4] 8: MOV OUT[0].z, TEMP[0].xxxx 9: DP4 OUT[0].y, IN[0], CONST[5] 10: DP4 OUT[0].x, IN[0], CONST[6] 11: MOV OUT[1].w, IMM[0].xxxx 12: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0].x, const[0]; 1: ADD temp[0].y, -temp[0].xxxx, const[1].xxxx; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0].yyyy; 4: ADD temp[0].z, -temp[0].xxxx, const[1].xxxx; 5: MOV output[1].xyz, const[3]; 6: MUL output[2].x, temp[0].zzzz, temp[0].yyyy; 7: DP4 temp[1].w, input[0], const[4]; 8: MOV temp[1].z, temp[0].xxxx; 9: DP4 temp[1].y, input[0], const[5]; 10: DP4 temp[1].x, input[0], const[6]; 11: MOV output[1].w, temp[0].1111; 12: MOV output[0], temp[1]; 13: MOV output[3], temp[1]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV temp[0].x, const[0]; 1: ADD temp[0].y, -temp[0].xxxx, const[1].xxxx; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0].yyyy; 4: ADD temp[0].z, -temp[0].xxxx, const[1].xxxx; 5: MOV output[1].xyz, const[3]; 6: MUL output[2].x, temp[0].zzzz, temp[0].yyyy; 7: DP4 temp[1].w, input[0], const[4]; 8: MOV temp[1].z, temp[0].xxxx; 9: DP4 temp[1].y, input[0], const[5]; 10: DP4 temp[1].x, input[0], const[6]; 11: MOV output[1].w, temp[0].1111; 12: MOV output[0], temp[1]; 13: MOV output[3], temp[1]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV temp[0].x, const[0]; 1: ADD temp[0].y, -temp[0].xxxx, const[1].xxxx; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0].yyyy; 4: ADD temp[0].z, -temp[0].xxxx, const[1].xxxx; 5: MOV output[1].xyz, const[3]; 6: MUL output[2].x, temp[0].zzzz, temp[0].yyyy; 7: DP4 temp[1].w, input[0], const[4]; 8: MOV temp[1].z, temp[0].xxxx; 9: DP4 temp[1].y, input[0], const[5]; 10: DP4 temp[1].x, input[0], const[6]; 11: MOV output[1].w, temp[0].1111; 12: MOV output[0], temp[1]; 13: MOV output[3], temp[1]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0].x, const[0]; 1: ADD temp[0].y, -temp[0].xxxx, const[1].xxxx; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0].yyyy; 4: ADD temp[0].z, -temp[0].xxxx, const[1].xxxx; 5: MOV output[1].xyz, const[3]; 6: MUL output[2].x, temp[0].zzzz, temp[0].yyyy; 7: DP4 temp[1].w, input[0], const[4]; 8: MOV temp[1].z, temp[0].xxxx; 9: DP4 temp[1].y, input[0], const[5]; 10: DP4 temp[1].x, input[0], const[6]; 11: MOV output[1].w, temp[0].1111; 12: MOV output[0], temp[1]; 13: MOV output[3], temp[1]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0].x, const[0]; 1: ADD temp[0].y, -temp[0].xxxx, const[1].xxxx; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0].yyyy; 4: ADD temp[0].z, -temp[0].xxxx, const[1].xxxx; 5: MOV output[1].xyz, const[3]; 6: MUL output[2].x, temp[0].zzzz, temp[0].yyyy; 7: DP4 temp[1].w, input[0], const[4]; 8: MOV temp[1].z, temp[0].xxxx; 9: DP4 temp[1].y, input[0], const[5]; 10: DP4 temp[1].x, input[0], const[6]; 11: MOV output[1].w, temp[0].1111; 12: MOV output[0], temp[1]; 13: MOV output[3], temp[1]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV temp[0].x, const[0]; 1: ADD temp[0].y, -temp[0].xxxx, const[1].xxxx; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0].yyyy; 4: ADD temp[0].z, -temp[0].xxxx, const[1].xxxx; 5: MOV output[1].xyz, const[3]; 6: MUL output[2].x, temp[0].zzzz, temp[0].yyyy; 7: DP4 temp[1].w, input[0], const[4]; 8: MOV temp[1].z, temp[0].xxxx; 9: DP4 temp[1].y, input[0], const[5]; 10: DP4 temp[1].x, input[0], const[6]; 11: MOV output[1].w, temp[0].1111; 12: MOV output[0], temp[1]; 13: MOV output[3], temp[1]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0].x, const[0].x___; 1: ADD temp[0].y, -temp[0]._x__, const[1]._x__; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0]._y__; 4: ADD temp[0].z, -temp[0].__x_, const[1].__x_; 5: MOV output[1].xyz, const[3].xyz_; 6: MUL output[2].x, temp[0].z___, temp[0].y___; 7: DP4 temp[1].w, input[0], const[4]; 8: MOV temp[1].z, temp[0].__x_; 9: DP4 temp[1].y, input[0], const[5]; 10: DP4 temp[1].x, input[0], const[6]; 11: MOV output[1].w, temp[0].___1; 12: MOV output[0], temp[1]; 13: MOV output[3], temp[1]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: ADD temp[0].y, -const[0]._x__, const[1]._x__; 1: DP4 temp[0].x, input[0], const[2]; 2: RCP temp[0].y, temp[0]._y__; 3: ADD temp[0].z, -temp[0].__x_, const[1].__x_; 4: MOV output[1].xyz, const[3].xyz_; 5: MUL output[2].x, temp[0].z___, temp[0].y___; 6: DP4 temp[1].w, input[0], const[4]; 7: MOV temp[1].z, temp[0].__x_; 8: DP4 temp[1].y, input[0], const[5]; 9: DP4 temp[1].x, input[0], const[6]; 10: MOV output[1].w, none.___1; 11: MOV output[0], temp[1]; 12: MOV output[3], temp[1]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV temp[2], const[1]._x__; 1: ADD temp[0].y, -const[0]._x__, temp[2]; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0]._y__; 4: ADD temp[0].z, -temp[0].__x_, const[1].__x_; 5: MOV output[1].xyz, const[3].xyz_; 6: MUL output[2].x, temp[0].z___, temp[0].y___; 7: DP4 temp[1].w, input[0], const[4]; 8: MOV temp[1].z, temp[0].__x_; 9: DP4 temp[1].y, input[0], const[5]; 10: DP4 temp[1].x, input[0], const[6]; 11: MOV output[1].w, none.___1; 12: MOV output[0], temp[1]; 13: MOV output[3], temp[1]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV temp[0], const[1]._x__; 1: ADD temp[0].y, -const[0]._x__, temp[0]; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0]._y__; 4: ADD temp[0].z, -temp[0].__x_, const[1].__x_; 5: MOV output[1].xyz, const[3].xyz_; 6: MUL output[2].x, temp[0].z___, temp[0].y___; 7: DP4 temp[1].w, input[0], const[4]; 8: MOV temp[1].z, temp[0].__x_; 9: DP4 temp[1].y, input[0], const[5]; 10: DP4 temp[1].x, input[0], const[6]; 11: MOV output[1].w, none.___1; 12: MOV output[0], temp[1]; 13: MOV output[3], temp[1]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV temp[0], const[1]._x__; 1: ADD temp[0].y, -const[0]._x__, temp[0]; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0]._y__; 4: ADD temp[0].z, -temp[0].__x_, const[1].__x_; 5: MOV output[1].xyz, const[3].xyz_; 6: MUL output[2].x, temp[0].z___, temp[0].y___; 7: DP4 temp[1].w, input[0], const[4]; 8: MOV temp[1].z, temp[0].__x_; 9: DP4 temp[1].y, input[0], const[5]; 10: DP4 temp[1].x, input[0], const[6]; 11: MOV output[1].w, none.___1; 12: MOV output[0], temp[1]; 13: MOV output[3], temp[1]; Final vertex program code: 0: op: 0x00f00003 dst: 0t op: VE_ADD src0: 0x01f8e022 reg: 1c swiz: U/ X/ U/ U src1: 0x01248022 reg: 1c swiz: 0/ 0/ 0/ 0 src2: 0x01248022 reg: 1c swiz: 0/ 0/ 0/ 0 1: op: 0x00200003 dst: 0t op: VE_ADD src0: 0x1ff8e002 reg: 0c swiz: -U/-X/-U/-U src1: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 2: op: 0x00100001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src2: 0x01248042 reg: 2c swiz: 0/ 0/ 0/ 0 3: op: 0x00200046 dst: 0t op: ME_RECIP_DX src0: 0x00492000 reg: 0t swiz: Y/ Y/ Y/ Y src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 4: op: 0x00400003 dst: 0t op: VE_ADD src0: 0x1fc7e000 reg: 0t swiz: -U/-U/-X/-U src1: 0x01c7e022 reg: 1c swiz: U/ U/ X/ U src2: 0x01248022 reg: 1c swiz: 0/ 0/ 0/ 0 5: op: 0x00702203 dst: 1o op: VE_ADD src0: 0x01d10062 reg: 3c swiz: X/ Y/ Z/ U src1: 0x01248062 reg: 3c swiz: 0/ 0/ 0/ 0 src2: 0x01248062 reg: 3c swiz: 0/ 0/ 0/ 0 6: op: 0x00104202 dst: 2o op: VE_MULTIPLY src0: 0x01ff4000 reg: 0t swiz: Z/ U/ U/ U src1: 0x01ff2000 reg: 0t swiz: Y/ U/ U/ U src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 7: op: 0x00802001 dst: 1t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src2: 0x01248082 reg: 4c swiz: 0/ 0/ 0/ 0 8: op: 0x00402003 dst: 1t op: VE_ADD src0: 0x01c7e000 reg: 0t swiz: U/ U/ X/ U src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 9: op: 0x00202001 dst: 1t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src2: 0x012480a2 reg: 5c swiz: 0/ 0/ 0/ 0 10: op: 0x00102001 dst: 1t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src2: 0x012480c2 reg: 6c swiz: 0/ 0/ 0/ 0 11: op: 0x00802203 dst: 1o op: VE_ADD src0: 0x017fe000 reg: 0t swiz: U/ U/ U/ 1 src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 12: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W src1: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 src2: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 13: op: 0x00f06203 dst: 3o op: VE_ADD src0: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W src1: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 src2: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL IN[1], GENERIC[0], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0] 0: TEX TEMP[0], IN[1], SAMP[0], 1D 1: MUL OUT[0], TEMP[0], IN[0] 2: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[1], 1D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[1], 1D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[1], 1D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[1], 1D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[1], 1D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[1], 1D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[1], 1D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[1].x___, 1D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[1].x___, 1D[0]; 1: MUL output[0], temp[0], input[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[1], input[1].x___, 1D[0]; 1: MUL output[0], temp[1], input[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[1], input[1].x___, 1D[0]; 1: MUL output[0], temp[1], input[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[1], input[1].x___, 1D[0]; 1: MUL output[0], temp[1], input[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[1], input[1].x___, 1D[0]; 1: MUL output[0], temp[1], input[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[1], input[1].x___, 1D[0]; 1: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = input[0], src1.w = input[0] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], input[1].x___, 1D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = input[0], src1.w = input[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], input[1].x___, 1D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = input[0], src1.w = input[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1], input[1].x___, 1D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = input[0], src1.w = input[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 pc=13************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) TEX: TEX t1, t1, texture[0] (00008041) 0: xyz: t1 t0 t0 bias-> o0.xyz (1c000001) w: t1 t0 t0 bias-> o0.w (01000001) xyz: t1.xyz t0.xyz 0.0 op: 00050200 w: t1.w t0.w 0.0 op: 00040509 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..5] DCL TEMP[0] 0: MOV TEMP[0].x, CONST[0] 1: ADD TEMP[0].y, -TEMP[0].xxxx, CONST[1].xxxx 2: DP4 TEMP[0].x, IN[0], CONST[2] 3: RCP TEMP[0].y, TEMP[0].yyyy 4: ADD TEMP[0].z, -TEMP[0].xxxx, CONST[1].xxxx 5: MOV OUT[1], IN[1] 6: MOV OUT[2].xyz, IN[2] 7: MUL OUT[3].x, TEMP[0].zzzz, TEMP[0].yyyy 8: DP4 OUT[0].w, IN[0], CONST[3] 9: MOV OUT[0].z, TEMP[0].xxxx 10: DP4 OUT[0].y, IN[0], CONST[4] 11: DP4 OUT[0].x, IN[0], CONST[5] 12: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0].x, const[0]; 1: ADD temp[0].y, -temp[0].xxxx, const[1].xxxx; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0].yyyy; 4: ADD temp[0].z, -temp[0].xxxx, const[1].xxxx; 5: MOV output[1], input[1]; 6: MOV output[2].xyz, input[2]; 7: MUL output[3].x, temp[0].zzzz, temp[0].yyyy; 8: DP4 temp[1].w, input[0], const[3]; 9: MOV temp[1].z, temp[0].xxxx; 10: DP4 temp[1].y, input[0], const[4]; 11: DP4 temp[1].x, input[0], const[5]; 12: MOV output[0], temp[1]; 13: MOV output[4], temp[1]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV temp[0].x, const[0]; 1: ADD temp[0].y, -temp[0].xxxx, const[1].xxxx; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0].yyyy; 4: ADD temp[0].z, -temp[0].xxxx, const[1].xxxx; 5: MOV output[1], input[1]; 6: MOV output[2].xyz, input[2]; 7: MUL output[3].x, temp[0].zzzz, temp[0].yyyy; 8: DP4 temp[1].w, input[0], const[3]; 9: MOV temp[1].z, temp[0].xxxx; 10: DP4 temp[1].y, input[0], const[4]; 11: DP4 temp[1].x, input[0], const[5]; 12: MOV output[0], temp[1]; 13: MOV output[4], temp[1]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV temp[0].x, const[0]; 1: ADD temp[0].y, -temp[0].xxxx, const[1].xxxx; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0].yyyy; 4: ADD temp[0].z, -temp[0].xxxx, const[1].xxxx; 5: MOV output[1], input[1]; 6: MOV output[2].xyz, input[2]; 7: MUL output[3].x, temp[0].zzzz, temp[0].yyyy; 8: DP4 temp[1].w, input[0], const[3]; 9: MOV temp[1].z, temp[0].xxxx; 10: DP4 temp[1].y, input[0], const[4]; 11: DP4 temp[1].x, input[0], const[5]; 12: MOV output[0], temp[1]; 13: MOV output[4], temp[1]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0].x, const[0]; 1: ADD temp[0].y, -temp[0].xxxx, const[1].xxxx; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0].yyyy; 4: ADD temp[0].z, -temp[0].xxxx, const[1].xxxx; 5: MOV output[1], input[1]; 6: MOV output[2].xyz, input[2]; 7: MUL output[3].x, temp[0].zzzz, temp[0].yyyy; 8: DP4 temp[1].w, input[0], const[3]; 9: MOV temp[1].z, temp[0].xxxx; 10: DP4 temp[1].y, input[0], const[4]; 11: DP4 temp[1].x, input[0], const[5]; 12: MOV output[0], temp[1]; 13: MOV output[4], temp[1]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0].x, const[0]; 1: ADD temp[0].y, -temp[0].xxxx, const[1].xxxx; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0].yyyy; 4: ADD temp[0].z, -temp[0].xxxx, const[1].xxxx; 5: MOV output[1], input[1]; 6: MOV output[2].xyz, input[2]; 7: MUL output[3].x, temp[0].zzzz, temp[0].yyyy; 8: DP4 temp[1].w, input[0], const[3]; 9: MOV temp[1].z, temp[0].xxxx; 10: DP4 temp[1].y, input[0], const[4]; 11: DP4 temp[1].x, input[0], const[5]; 12: MOV output[0], temp[1]; 13: MOV output[4], temp[1]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV temp[0].x, const[0]; 1: ADD temp[0].y, -temp[0].xxxx, const[1].xxxx; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0].yyyy; 4: ADD temp[0].z, -temp[0].xxxx, const[1].xxxx; 5: MOV output[1], input[1]; 6: MOV output[2].xyz, input[2]; 7: MUL output[3].x, temp[0].zzzz, temp[0].yyyy; 8: DP4 temp[1].w, input[0], const[3]; 9: MOV temp[1].z, temp[0].xxxx; 10: DP4 temp[1].y, input[0], const[4]; 11: DP4 temp[1].x, input[0], const[5]; 12: MOV output[0], temp[1]; 13: MOV output[4], temp[1]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0].x, const[0].x___; 1: ADD temp[0].y, -temp[0]._x__, const[1]._x__; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0]._y__; 4: ADD temp[0].z, -temp[0].__x_, const[1].__x_; 5: MOV output[1], input[1]; 6: MOV output[2].xyz, input[2].xyz_; 7: MUL output[3].x, temp[0].z___, temp[0].y___; 8: DP4 temp[1].w, input[0], const[3]; 9: MOV temp[1].z, temp[0].__x_; 10: DP4 temp[1].y, input[0], const[4]; 11: DP4 temp[1].x, input[0], const[5]; 12: MOV output[0], temp[1]; 13: MOV output[4], temp[1]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: ADD temp[0].y, -const[0]._x__, const[1]._x__; 1: DP4 temp[0].x, input[0], const[2]; 2: RCP temp[0].y, temp[0]._y__; 3: ADD temp[0].z, -temp[0].__x_, const[1].__x_; 4: MOV output[1], input[1]; 5: MOV output[2].xyz, input[2].xyz_; 6: MUL output[3].x, temp[0].z___, temp[0].y___; 7: DP4 temp[1].w, input[0], const[3]; 8: MOV temp[1].z, temp[0].__x_; 9: DP4 temp[1].y, input[0], const[4]; 10: DP4 temp[1].x, input[0], const[5]; 11: MOV output[0], temp[1]; 12: MOV output[4], temp[1]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV temp[2], const[1]._x__; 1: ADD temp[0].y, -const[0]._x__, temp[2]; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0]._y__; 4: ADD temp[0].z, -temp[0].__x_, const[1].__x_; 5: MOV output[1], input[1]; 6: MOV output[2].xyz, input[2].xyz_; 7: MUL output[3].x, temp[0].z___, temp[0].y___; 8: DP4 temp[1].w, input[0], const[3]; 9: MOV temp[1].z, temp[0].__x_; 10: DP4 temp[1].y, input[0], const[4]; 11: DP4 temp[1].x, input[0], const[5]; 12: MOV output[0], temp[1]; 13: MOV output[4], temp[1]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV temp[0], const[1]._x__; 1: ADD temp[0].y, -const[0]._x__, temp[0]; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0]._y__; 4: ADD temp[0].z, -temp[0].__x_, const[1].__x_; 5: MOV output[1], input[1]; 6: MOV output[2].xyz, input[2].xyz_; 7: MUL output[3].x, temp[0].z___, temp[0].y___; 8: DP4 temp[1].w, input[0], const[3]; 9: MOV temp[1].z, temp[0].__x_; 10: DP4 temp[1].y, input[0], const[4]; 11: DP4 temp[1].x, input[0], const[5]; 12: MOV output[0], temp[1]; 13: MOV output[4], temp[1]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV temp[0], const[1]._x__; 1: ADD temp[0].y, -const[0]._x__, temp[0]; 2: DP4 temp[0].x, input[0], const[2]; 3: RCP temp[0].y, temp[0]._y__; 4: ADD temp[0].z, -temp[0].__x_, const[1].__x_; 5: MOV output[1], input[1]; 6: MOV output[2].xyz, input[2].xyz_; 7: MUL output[3].x, temp[0].z___, temp[0].y___; 8: DP4 temp[1].w, input[0], const[3]; 9: MOV temp[1].z, temp[0].__x_; 10: DP4 temp[1].y, input[0], const[4]; 11: DP4 temp[1].x, input[0], const[5]; 12: MOV output[0], temp[1]; 13: MOV output[4], temp[1]; Final vertex program code: 0: op: 0x00f00003 dst: 0t op: VE_ADD src0: 0x01f8e022 reg: 1c swiz: U/ X/ U/ U src1: 0x01248022 reg: 1c swiz: 0/ 0/ 0/ 0 src2: 0x01248022 reg: 1c swiz: 0/ 0/ 0/ 0 1: op: 0x00200003 dst: 0t op: VE_ADD src0: 0x1ff8e002 reg: 0c swiz: -U/-X/-U/-U src1: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 2: op: 0x00100001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src2: 0x01248042 reg: 2c swiz: 0/ 0/ 0/ 0 3: op: 0x00200046 dst: 0t op: ME_RECIP_DX src0: 0x00492000 reg: 0t swiz: Y/ Y/ Y/ Y src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 4: op: 0x00400003 dst: 0t op: VE_ADD src0: 0x1fc7e000 reg: 0t swiz: -U/-U/-X/-U src1: 0x01c7e022 reg: 1c swiz: U/ U/ X/ U src2: 0x01248022 reg: 1c swiz: 0/ 0/ 0/ 0 5: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 6: op: 0x00704203 dst: 2o op: VE_ADD src0: 0x01d10041 reg: 2i swiz: X/ Y/ Z/ U src1: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 7: op: 0x00106202 dst: 3o op: VE_MULTIPLY src0: 0x01ff4000 reg: 0t swiz: Z/ U/ U/ U src1: 0x01ff2000 reg: 0t swiz: Y/ U/ U/ U src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 8: op: 0x00802001 dst: 1t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src2: 0x01248062 reg: 3c swiz: 0/ 0/ 0/ 0 9: op: 0x00402003 dst: 1t op: VE_ADD src0: 0x01c7e000 reg: 0t swiz: U/ U/ X/ U src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 10: op: 0x00202001 dst: 1t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src2: 0x01248082 reg: 4c swiz: 0/ 0/ 0/ 0 11: op: 0x00102001 dst: 1t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src2: 0x012480a2 reg: 5c swiz: 0/ 0/ 0/ 0 12: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W src1: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 src2: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 13: op: 0x00f08203 dst: 4o op: VE_ADD src0: 0x00d10020 reg: 1t swiz: X/ Y/ Z/ W src1: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 src2: 0x01248020 reg: 1t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL IN[1], GENERIC[0], PERSPECTIVE DCL IN[2], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL TEMP[0..1] 0: TEX TEMP[0], IN[2], SAMP[1], 1D 1: TEX TEMP[1], IN[1], SAMP[0], 2D 2: MUL TEMP[1], TEMP[1], TEMP[0] 3: MUL OUT[0], TEMP[1], IN[0] 4: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[2], 1D[1]; 1: TEX temp[1], input[1], 2D[0]; 2: MUL temp[1], temp[1], temp[0]; 3: MUL output[0], temp[1], input[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[2], 1D[1]; 1: TEX temp[1], input[1], 2D[0]; 2: MUL temp[1], temp[1], temp[0]; 3: MUL output[0], temp[1], input[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[2], 1D[1]; 1: TEX temp[1], input[1], 2D[0]; 2: MUL temp[1], temp[1], temp[0]; 3: MUL output[0], temp[1], input[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[2], 1D[1]; 1: TEX temp[1], input[1], 2D[0]; 2: MUL temp[1], temp[1], temp[0]; 3: MUL output[0], temp[1], input[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[2], 1D[1]; 1: TEX temp[1], input[1], 2D[0]; 2: MUL temp[1], temp[1], temp[0]; 3: MUL output[0], temp[1], input[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[2], 1D[1]; 1: TEX temp[1], input[1], 2D[0]; 2: MUL temp[1], temp[1], temp[0]; 3: MUL output[0], temp[1], input[0]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[2], 1D[1]; 1: TEX temp[1], input[1], 2D[0]; 2: MUL temp[1], temp[1], temp[0]; 3: MUL output[0], temp[1], input[0]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[2].x___, 1D[1]; 1: TEX temp[1], input[1].xy__, 2D[0]; 2: MUL temp[1], temp[1], temp[0]; 3: MUL output[0], temp[1], input[0]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[2].x___, 1D[1]; 1: TEX temp[1], input[1].xy__, 2D[0]; 2: MUL temp[1], temp[1], temp[0]; 3: MUL output[0], temp[1], input[0]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[2], input[2].x___, 1D[1]; 1: TEX temp[3], input[1].xy__, 2D[0]; 2: MUL temp[4], temp[3], temp[2]; 3: MUL output[0], temp[4], input[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[2], input[2].x___, 1D[1]; 1: TEX temp[3], input[1].xy__, 2D[0]; 2: MUL temp[4], temp[3], temp[2]; 3: MUL output[0], temp[4], input[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[2], input[2].x___, 1D[1]; 1: TEX temp[3], input[1].xy__, 2D[0]; 2: MUL temp[4], temp[3], temp[2]; 3: MUL output[0], temp[4], input[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[2], input[2].x___, 1D[1]; 1: TEX temp[3], input[1].xy__, 2D[0]; 2: MUL temp[4], temp[3], temp[2]; 3: MUL output[0], temp[4], input[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[2], input[2].x___, 1D[1]; 1: TEX temp[3], input[1].xy__, 2D[0]; 2: src0.xyz = temp[3], src0.w = temp[3], src1.xyz = temp[2], src1.w = temp[2] MAD temp[4].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[4].w, src0.w, src1.w, src0.0 3: src0.xyz = temp[4], src0.w = temp[4], src1.xyz = input[0], src1.w = input[0] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[2], input[2].x___, 1D[1]; 2: TEX temp[3], input[1].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 3: src0.xyz = temp[3], src0.w = temp[3], src1.xyz = temp[2], src1.w = temp[2] SEM_WAIT MAD temp[4].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[4].w, src0.w, src1.w, src0.0 4: src0.xyz = temp[4], src0.w = temp[4], src1.xyz = input[0], src1.w = input[0] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[2], input[2].x___, 1D[1]; 2: TEX temp[3], input[1].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 3: src0.xyz = temp[3], src0.w = temp[3], src1.xyz = temp[2], src1.w = temp[2] SEM_WAIT MAD temp[4].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[4].w, src0.w, src1.w, src0.0 4: src0.xyz = temp[4], src0.w = temp[4], src1.xyz = input[0], src1.w = input[0] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[2], input[2].x___, 1D[1]; 2: TEX temp[1], input[1].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 3: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = temp[2], src1.w = temp[2] SEM_WAIT MAD temp[1].xyz, src0.xyz, src1.xyz, src0.000 MAD temp[1].w, src0.w, src1.w, src0.0 4: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = input[0], src1.w = input[0] MAD color[0].xyz, src0.xyz, src1.xyz, src0.000 MAD color[0].w, src0.w, src1.w, src0.0 pc=14************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 1, tex_end: 1 (code_addr: 00420040) TEX: TEX t2, t2, texture[1] (00008882) TEX t1, t1, texture[0] (00008041) 0: xyz: t1 t2 t0 bias-> t1.xyz (03840081) w: t1 t2 t0 bias-> t1.w (00840081) xyz: t1.xyz t2.xyz 0.0 op: 00050200 w: t1.w t2.w 0.0 op: 00040509 1: xyz: t1 t0 t0 bias-> o0.xyz (1c000001) w: t1 t0 t0 bias-> o0.w (01000001) xyz: t1.xyz t0.xyz 0.0 op: 00050200 w: t1.w t0.w 0.0 op: 00040509 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL TEMP[0..1] IMM FLT32 { 0.5000, 1.0000, 0.0000, 0.0000} 0: TEX TEMP[0].w, IN[1], SAMP[1], 1D 1: TEX TEMP[1], IN[0], SAMP[0], 2D 2: ADD TEMP[0].x, -TEMP[0].wwww, IMM[0].yyyy 3: MUL TEMP[0].x, TEMP[0], IMM[0].xyzy 4: MAD OUT[0].xyz, TEMP[1], TEMP[0].wwww, TEMP[0].xxxx 5: MOV OUT[0].w, TEMP[1] 6: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0].w, input[1], 1D[1]; 1: TEX temp[1], input[0], 2D[0]; 2: ADD temp[0].x, -temp[0].wwww, temp[0].1111; 3: MUL temp[0].x, temp[0], temp[0].H101; 4: MAD output[0].xyz, temp[1], temp[0].wwww, temp[0].xxxx; 5: MOV output[0].w, temp[1]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0].w, input[1], 1D[1]; 1: TEX temp[1], input[0], 2D[0]; 2: ADD temp[0].x, -temp[0].wwww, temp[0].1111; 3: MUL temp[0].x, temp[0], temp[0].H101; 4: MAD output[0].xyz, temp[1], temp[0].wwww, temp[0].xxxx; 5: MOV output[0].w, temp[1]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0].w, input[1], 1D[1]; 1: TEX temp[1], input[0], 2D[0]; 2: ADD temp[0].x, -temp[0].wwww, temp[0].1111; 3: MUL temp[0].x, temp[0], temp[0].H101; 4: MAD output[0].xyz, temp[1], temp[0].wwww, temp[0].xxxx; 5: MOV output[0].w, temp[1]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0].w, input[1], 1D[1]; 1: TEX temp[1], input[0], 2D[0]; 2: ADD temp[0].x, -temp[0].wwww, temp[0].1111; 3: MUL temp[0].x, temp[0], temp[0].H101; 4: MAD output[0].xyz, temp[1], temp[0].wwww, temp[0].xxxx; 5: MOV output[0].w, temp[1]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0].w, input[1], 1D[1]; 1: TEX temp[1], input[0], 2D[0]; 2: ADD temp[0].x, -temp[0].wwww, temp[0].1111; 3: MUL temp[0].x, temp[0], temp[0].H101; 4: MAD output[0].xyz, temp[1], temp[0].wwww, temp[0].xxxx; 5: MOV output[0].w, temp[1]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[2], input[1], 1D[1]; 1: MOV temp[0].w, temp[2]; 2: TEX temp[1], input[0], 2D[0]; 3: ADD temp[0].x, -temp[0].wwww, temp[0].1111; 4: MUL temp[0].x, temp[0], temp[0].H101; 5: MAD output[0].xyz, temp[1], temp[0].wwww, temp[0].xxxx; 6: MOV output[0].w, temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[2], input[1], 1D[1]; 1: MOV temp[0].w, temp[2]; 2: TEX temp[1], input[0], 2D[0]; 3: ADD temp[0].x, -temp[0].wwww, temp[0].1111; 4: MUL temp[0].x, temp[0], temp[0].H101; 5: MAD output[0].xyz, temp[1], temp[0].wwww, temp[0].xxxx; 6: MOV output[0].w, temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[2].w, input[1].x___, 1D[1]; 1: MOV temp[0].w, temp[2].___w; 2: TEX temp[1], input[0].xy__, 2D[0]; 3: ADD temp[0].x, -temp[0].w___, temp[0].1___; 4: MUL temp[0].x, temp[0].x___, temp[0].H___; 5: MAD output[0].xyz, temp[1].xyz_, temp[0].www_, temp[0].xxx_; 6: MOV output[0].w, temp[1].___w; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[2].w, input[1].x___, 1D[1]; 1: MOV temp[0].w, temp[2].___w; 2: TEX temp[1], input[0].xy__, 2D[0]; 3: ADD temp[0].x, -temp[0].w___, temp[0].1___; 4: MUL temp[0].x, temp[0].x___, temp[0].H___; 5: MAD output[0].xyz, temp[1].xyz_, temp[0].www_, temp[0].xxx_; 6: MOV output[0].w, temp[1].___w; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[3].w, input[1].x___, 1D[1]; 1: MOV temp[4].w, temp[3].___w; 2: TEX temp[5], input[0].xy__, 2D[0]; 3: ADD temp[6].x, -temp[4].w___, temp[0].1___; 4: MUL temp[7].x, temp[6].x___, temp[0].H___; 5: MAD output[0].xyz, temp[5].xyz_, temp[4].www_, temp[7].xxx_; 6: MOV output[0].w, temp[5].___w; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[3].w, input[1].x___, 1D[1]; 1: TEX temp[5], input[0].xy__, 2D[0]; 2: ADD temp[6].x, -temp[3].w___, none.1___; 3: MUL temp[7].x, temp[6].x___, none.H___; 4: MAD output[0].xyz, temp[5].xyz_, temp[3].www_, temp[7].xxx_; 5: MOV output[0].w, temp[5].___w; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[3].w, input[1].x___, 1D[1]; 1: TEX temp[5], input[0].xy__, 2D[0]; 2: ADD temp[6].x, -temp[3].w___, none.1___; 3: MUL temp[7].x, temp[6].x___, none.H___; 4: MAD output[0].xyz, temp[5].xyz_, temp[3].www_, temp[7].xxx_; 5: MOV output[0].w, temp[5].___w; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[3].w, input[1].x___, 1D[1]; 1: TEX temp[5], input[0].xy__, 2D[0]; 2: ADD temp[6].x, -temp[3].w___, none.1___; 3: MUL temp[7].x, temp[6].x___, none.H___; 4: MAD output[0].xyz, temp[5].xyz_, temp[3].www_, temp[7].xxx_; 5: MOV output[0].w, temp[5].___w; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[3].w, input[1].x___, 1D[1]; 1: TEX temp[5], input[0].xy__, 2D[0]; 2: src0.w = temp[3] MAD temp[6].x, -src0.w__, src0.111, src0.1__ 3: src0.xyz = temp[6] MAD temp[7].x, src0.x__, src0.H__, src0.000 4: src0.xyz = temp[5], src0.w = temp[3], src1.xyz = temp[7] MAD color[0].xyz, src0.xyz, src0.www, src1.xxx 5: src0.w = temp[5] MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[3].w, input[1].x___, 1D[1]; 2: TEX temp[5], input[0].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 3: src0.w = temp[3] SEM_WAIT MAD temp[6].x, -src0.w__, src0.111, src0.1__ 4: src0.xyz = temp[6] MAD temp[7].x, src0.x__, src0.H__, src0.000 5: src0.xyz = temp[5], src0.w = temp[3], src1.xyz = temp[7], src1.w = temp[5] MAD color[0].xyz, src0.xyz, src0.www, src1.xxx MAD color[0].w, src1.w, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[3].w, input[1].x___, 1D[1]; 2: TEX temp[5], input[0].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 3: src0.w = temp[3] SEM_WAIT MAD temp[6].x, -src0.w__, src0.111, src0.1__ 4: src0.xyz = temp[6] MAD temp[7].x, src0.x__, src0.H__, src0.000 5: src0.xyz = temp[5], src0.w = temp[3], src1.xyz = temp[7], src1.w = temp[5] MAD color[0].xyz, src0.xyz, src0.www, src1.xxx MAD color[0].w, src1.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[1].w, input[1].x___, 1D[1]; 2: TEX temp[0], input[0].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 3: src0.w = temp[1] SEM_WAIT MAD temp[2].x, -src0.w__, src0.1__, src0.1__ 4: src0.xyz = temp[2] MAD temp[2].x, src0.x__, src0.H__, src0.0__ 5: src0.xyz = temp[0], src0.w = temp[1], src1.xyz = temp[2], src1.w = temp[0] MAD color[0].xyz, src0.xyz, src0.www, src1.xxx MAD color[0].w, src1.w, src0.1, src0.0 pc=15************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 2, tex_end: 1 (code_addr: 00420080) TEX: TEX t1, t1, texture[1] (00008841) TEX t0, t0, texture[0] (00008000) 0: xyz: t0 t0 t0 bias-> t2.x (00880000) w: t1 t0 t0 bias-> (00000001) xyz: -t1.www 1.0 1.0 op: 00054aac w: t0.x t0.x t0.x op: 00000000 1: xyz: t2 t0 t0 bias-> t2.x (00880002) w: t0 t0 t0 bias-> (00000000) xyz: t2.xyz 0.5 0.0 op: 00050b00 w: t2.x t2.x t2.x op: 00000000 2: xyz: t0 t2 t0 bias-> o0.xyz (1c000080) w: t1 t0 t0 bias-> o0.w (01000001) xyz: t0.xyz t1.www t2.xxx op: 00014600 w: t0.w 1.0 0.0 op: 0004088a r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0], input[0]; 1: MOV output[1], input[1]; 2: MOV output[0], temp[0]; 3: MOV output[2], temp[0]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1], input[1]; 1: MOV output[0], input[0]; 2: MOV output[2], input[0]; Final vertex program code: 0: op: 0x00f02203 dst: 1o op: VE_ADD src0: 0x00d10021 reg: 1i swiz: X/ Y/ Z/ W src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 2: op: 0x00f04203 dst: 2o op: VE_ADD src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 src2: 0x01248001 reg: 0i swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX output[0], input[0], 2D[0]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[1], input[0], 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[1], input[0], 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[1], input[0].xy__, 2D[0]; 1: MOV output[0], temp[1]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MOV output[0], temp[0]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MOV output[0], temp[0]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MOV output[0], temp[0]; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: MOV output[0], temp[0]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[0], input[0].xy__, 2D[0]; 1: src0.xyz = temp[0], src0.w = temp[0] MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[0].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[0], src0.w = temp[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[0].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[0], src0.w = temp[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: BEGIN_TEX; 1: TEX temp[0], input[0].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 2: src0.xyz = temp[0], src0.w = temp[0] SEM_WAIT MAD color[0].xyz, src0.xyz, src0.111, src0.000 MAD color[0].w, src0.w, src0.1, src0.0 pc=16************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) TEX: TEX t0, t0, texture[0] (00008000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: t0.xyz 1.0 0.0 op: 00050a80 w: t0.w 1.0 0.0 op: 00040889 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL OUT[4], GENERIC[2] DCL OUT[5], GENERIC[3] DCL CONST[0..2] DCL CONST[4..7] DCL TEMP[0] IMM FLT32 { 2.0000, 0.0000, 1.0000, 0.0000} 0: MOV TEMP[0].y, CONST[0].xxxx 1: MOV TEMP[0].x, CONST[1] 2: MUL TEMP[0].zw, TEMP[0].xyxy, CONST[2].xxxx 3: MOV OUT[1].xyz, IN[1] 4: MAD OUT[2].xy, TEMP[0], CONST[2].xxxx, IN[2] 5: MAD OUT[3].xy, TEMP[0].zwzw, IMM[0].xxxx, IN[2] 6: MAD OUT[4].xy, -TEMP[0], CONST[2].xxxx, IN[2] 7: MAD OUT[5].xy, -TEMP[0].zwzw, IMM[0].xxxx, IN[2] 8: DP4 OUT[0].w, IN[0], CONST[4] 9: DP4 OUT[0].z, IN[0], CONST[5] 10: DP4 OUT[0].y, IN[0], CONST[6] 11: DP4 OUT[0].x, IN[0], CONST[7] 12: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV temp[0].y, const[0].xxxx; 1: MOV temp[0].x, const[1]; 2: MUL temp[0].zw, temp[0].xyxy, const[2].xxxx; 3: MOV output[1].xyz, input[1]; 4: MAD output[2].xy, temp[0], const[2].xxxx, input[2]; 5: MAD output[3].xy, temp[0].zwzw, const[8].xxxx, input[2]; 6: MAD output[4].xy, -temp[0], const[2].xxxx, input[2]; 7: MAD output[5].xy, -temp[0].zwzw, const[8].xxxx, input[2]; 8: DP4 temp[1].w, input[0], const[4]; 9: DP4 temp[1].z, input[0], const[5]; 10: DP4 temp[1].y, input[0], const[6]; 11: DP4 temp[1].x, input[0], const[7]; 12: MOV output[0], temp[1]; 13: MOV output[6], temp[1]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV temp[0].y, const[0].xxxx; 1: MOV temp[0].x, const[1]; 2: MUL temp[0].zw, temp[0].xyxy, const[2].xxxx; 3: MOV output[1].xyz, input[1]; 4: MAD output[2].xy, temp[0], const[2].xxxx, input[2]; 5: MAD output[3].xy, temp[0].zwzw, const[8].xxxx, input[2]; 6: MAD output[4].xy, -temp[0], const[2].xxxx, input[2]; 7: MAD output[5].xy, -temp[0].zwzw, const[8].xxxx, input[2]; 8: DP4 temp[1].w, input[0], const[4]; 9: DP4 temp[1].z, input[0], const[5]; 10: DP4 temp[1].y, input[0], const[6]; 11: DP4 temp[1].x, input[0], const[7]; 12: MOV output[0], temp[1]; 13: MOV output[6], temp[1]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV temp[0].y, const[0].xxxx; 1: MOV temp[0].x, const[1]; 2: MUL temp[0].zw, temp[0].xyxy, const[2].xxxx; 3: MOV output[1].xyz, input[1]; 4: MAD output[2].xy, temp[0], const[2].xxxx, input[2]; 5: MAD output[3].xy, temp[0].zwzw, const[8].xxxx, input[2]; 6: MAD output[4].xy, -temp[0], const[2].xxxx, input[2]; 7: MAD output[5].xy, -temp[0].zwzw, const[8].xxxx, input[2]; 8: DP4 temp[1].w, input[0], const[4]; 9: DP4 temp[1].z, input[0], const[5]; 10: DP4 temp[1].y, input[0], const[6]; 11: DP4 temp[1].x, input[0], const[7]; 12: MOV output[0], temp[1]; 13: MOV output[6], temp[1]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV temp[0].y, const[0].xxxx; 1: MOV temp[0].x, const[1]; 2: MUL temp[0].zw, temp[0].xyxy, const[2].xxxx; 3: MOV output[1].xyz, input[1]; 4: MAD output[2].xy, temp[0], const[2].xxxx, input[2]; 5: MAD output[3].xy, temp[0].zwzw, const[8].xxxx, input[2]; 6: MAD output[4].xy, -temp[0], const[2].xxxx, input[2]; 7: MAD output[5].xy, -temp[0].zwzw, const[8].xxxx, input[2]; 8: DP4 temp[1].w, input[0], const[4]; 9: DP4 temp[1].z, input[0], const[5]; 10: DP4 temp[1].y, input[0], const[6]; 11: DP4 temp[1].x, input[0], const[7]; 12: MOV output[0], temp[1]; 13: MOV output[6], temp[1]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV temp[0].y, const[0].xxxx; 1: MOV temp[0].x, const[1]; 2: MUL temp[0].zw, temp[0].xyxy, const[2].xxxx; 3: MOV output[1].xyz, input[1]; 4: MAD output[2].xy, temp[0], const[2].xxxx, input[2]; 5: MAD output[3].xy, temp[0].zwzw, const[8].xxxx, input[2]; 6: MAD output[4].xy, -temp[0], const[2].xxxx, input[2]; 7: MAD output[5].xy, -temp[0].zwzw, const[8].xxxx, input[2]; 8: DP4 temp[1].w, input[0], const[4]; 9: DP4 temp[1].z, input[0], const[5]; 10: DP4 temp[1].y, input[0], const[6]; 11: DP4 temp[1].x, input[0], const[7]; 12: MOV output[0], temp[1]; 13: MOV output[6], temp[1]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV temp[0].y, const[0].xxxx; 1: MOV temp[0].x, const[1]; 2: MUL temp[0].zw, temp[0].xyxy, const[2].xxxx; 3: MOV output[1].xyz, input[1]; 4: MAD output[2].xy, temp[0], const[2].xxxx, input[2]; 5: MAD output[3].xy, temp[0].zwzw, const[8].xxxx, input[2]; 6: MAD output[4].xy, -temp[0], const[2].xxxx, input[2]; 7: MAD output[5].xy, -temp[0].zwzw, const[8].xxxx, input[2]; 8: DP4 temp[1].w, input[0], const[4]; 9: DP4 temp[1].z, input[0], const[5]; 10: DP4 temp[1].y, input[0], const[6]; 11: DP4 temp[1].x, input[0], const[7]; 12: MOV output[0], temp[1]; 13: MOV output[6], temp[1]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV temp[0].y, const[0]._x__; 1: MOV temp[0].x, const[1].x___; 2: MUL temp[0].zw, temp[0].__xy, const[2].__xx; 3: MOV output[1].xyz, input[1].xyz_; 4: MAD output[2].xy, temp[0].xy__, const[2].xx__, input[2].xy__; 5: MAD output[3].xy, temp[0].zw__, const[8].xx__, input[2].xy__; 6: MAD output[4].xy, -temp[0].xy__, const[2].xx__, input[2].xy__; 7: MAD output[5].xy, -temp[0].zw__, const[8].xx__, input[2].xy__; 8: DP4 temp[1].w, input[0], const[4]; 9: DP4 temp[1].z, input[0], const[5]; 10: DP4 temp[1].y, input[0], const[6]; 11: DP4 temp[1].x, input[0], const[7]; 12: MOV output[0], temp[1]; 13: MOV output[6], temp[1]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV temp[0].y, const[0]._x__; 1: MOV temp[0].x, const[1].x___; 2: MUL temp[0].zw, temp[0].__xy, const[2].__xx; 3: MOV output[1].xyz, input[1].xyz_; 4: MAD output[2].xy, temp[0].xy__, const[2].xx__, input[2].xy__; 5: MAD output[3].xy, temp[0].zw__, const[8].xx__, input[2].xy__; 6: MAD output[4].xy, -temp[0].xy__, const[2].xx__, input[2].xy__; 7: MAD output[5].xy, -temp[0].zw__, const[8].xx__, input[2].xy__; 8: DP4 temp[1].w, input[0], const[4]; 9: DP4 temp[1].z, input[0], const[5]; 10: DP4 temp[1].y, input[0], const[6]; 11: DP4 temp[1].x, input[0], const[7]; 12: MOV output[0], temp[1]; 13: MOV output[6], temp[1]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV temp[0].y, const[0]._x__; 1: MOV temp[0].x, const[1].x___; 2: MUL temp[0].zw, temp[0].__xy, const[2].__xx; 3: MOV output[1].xyz, input[1].xyz_; 4: MAD output[2].xy, temp[0].xy__, const[2].xx__, input[2].xy__; 5: MAD output[3].xy, temp[0].zw__, const[8].xx__, input[2].xy__; 6: MAD output[4].xy, -temp[0].xy__, const[2].xx__, input[2].xy__; 7: MAD output[5].xy, -temp[0].zw__, const[8].xx__, input[2].xy__; 8: DP4 temp[1].w, input[0], const[4]; 9: DP4 temp[1].z, input[0], const[5]; 10: DP4 temp[1].y, input[0], const[6]; 11: DP4 temp[1].x, input[0], const[7]; 12: MOV output[0], temp[1]; 13: MOV output[6], temp[1]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV temp[0].y, const[0]._x__; 1: MOV temp[0].x, const[1].x___; 2: MUL temp[0].zw, temp[0].__xy, const[2].__xx; 3: MOV output[1].xyz, input[1].xyz_; 4: MAD output[2].xy, temp[0].xy__, const[2].xx__, input[2].xy__; 5: MAD output[3].xy, temp[0].zw__, const[8].xx__, input[2].xy__; 6: MAD output[4].xy, -temp[0].xy__, const[2].xx__, input[2].xy__; 7: MAD output[5].xy, -temp[0].zw__, const[8].xx__, input[2].xy__; 8: DP4 temp[0].w, input[0], const[4]; 9: DP4 temp[0].z, input[0], const[5]; 10: DP4 temp[0].y, input[0], const[6]; 11: DP4 temp[0].x, input[0], const[7]; 12: MOV output[0], temp[0]; 13: MOV output[6], temp[0]; CONST[8] = { 2.0000 0.0000 1.0000 0.0000 } Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV temp[0].y, const[0]._x__; 1: MOV temp[0].x, const[1].x___; 2: MUL temp[0].zw, temp[0].__xy, const[2].__xx; 3: MOV output[1].xyz, input[1].xyz_; 4: MAD output[2].xy, temp[0].xy__, const[2].xx__, input[2].xy__; 5: MAD output[3].xy, temp[0].zw__, const[8].xx__, input[2].xy__; 6: MAD output[4].xy, -temp[0].xy__, const[2].xx__, input[2].xy__; 7: MAD output[5].xy, -temp[0].zw__, const[8].xx__, input[2].xy__; 8: DP4 temp[0].w, input[0], const[4]; 9: DP4 temp[0].z, input[0], const[5]; 10: DP4 temp[0].y, input[0], const[6]; 11: DP4 temp[0].x, input[0], const[7]; 12: MOV output[0], temp[0]; 13: MOV output[6], temp[0]; Final vertex program code: 0: op: 0x00200003 dst: 0t op: VE_ADD src0: 0x01f8e002 reg: 0c swiz: U/ X/ U/ U src1: 0x01248002 reg: 0c swiz: 0/ 0/ 0/ 0 src2: 0x01248002 reg: 0c swiz: 0/ 0/ 0/ 0 1: op: 0x00100003 dst: 0t op: VE_ADD src0: 0x01ff0022 reg: 1c swiz: X/ U/ U/ U src1: 0x01248022 reg: 1c swiz: 0/ 0/ 0/ 0 src2: 0x01248022 reg: 1c swiz: 0/ 0/ 0/ 0 2: op: 0x00c00002 dst: 0t op: VE_MULTIPLY src0: 0x0047e000 reg: 0t swiz: U/ U/ X/ Y src1: 0x0007e042 reg: 2c swiz: U/ U/ X/ X src2: 0x01248042 reg: 2c swiz: 0/ 0/ 0/ 0 3: op: 0x00702203 dst: 1o op: VE_ADD src0: 0x01d10021 reg: 1i swiz: X/ Y/ Z/ U src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 4: op: 0x00304204 dst: 2o op: VE_MULTIPLY_ADD src0: 0x01f90000 reg: 0t swiz: X/ Y/ U/ U src1: 0x01f80042 reg: 2c swiz: X/ X/ U/ U src2: 0x01f90041 reg: 2i swiz: X/ Y/ U/ U 5: op: 0x00306204 dst: 3o op: VE_MULTIPLY_ADD src0: 0x01fb4000 reg: 0t swiz: Z/ W/ U/ U src1: 0x01f80102 reg: 8c swiz: X/ X/ U/ U src2: 0x01f90041 reg: 2i swiz: X/ Y/ U/ U 6: op: 0x00308204 dst: 4o op: VE_MULTIPLY_ADD src0: 0x1ff90000 reg: 0t swiz: -X/-Y/-U/-U src1: 0x01f80042 reg: 2c swiz: X/ X/ U/ U src2: 0x01f90041 reg: 2i swiz: X/ Y/ U/ U 7: op: 0x0030a204 dst: 5o op: VE_MULTIPLY_ADD src0: 0x1ffb4000 reg: 0t swiz: -Z/-W/-U/-U src1: 0x01f80102 reg: 8c swiz: X/ X/ U/ U src2: 0x01f90041 reg: 2i swiz: X/ Y/ U/ U 8: op: 0x00800001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10082 reg: 4c swiz: X/ Y/ Z/ W src2: 0x01248082 reg: 4c swiz: 0/ 0/ 0/ 0 9: op: 0x00400001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d100a2 reg: 5c swiz: X/ Y/ Z/ W src2: 0x012480a2 reg: 5c swiz: 0/ 0/ 0/ 0 10: op: 0x00200001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d100c2 reg: 6c swiz: X/ Y/ Z/ W src2: 0x012480c2 reg: 6c swiz: 0/ 0/ 0/ 0 11: op: 0x00100001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d100e2 reg: 7c swiz: X/ Y/ Z/ W src2: 0x012480e2 reg: 7c swiz: 0/ 0/ 0/ 0 12: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 13: op: 0x00f0c203 dst: 6o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..6] IMM FLT32 { 0.5000, 0.1500, 0.0750, 0.3500} IMM FLT32 { 0.1000, 0.0000, 1.0000, 0.0000} 0: TEX TEMP[0], IN[3], SAMP[0], RECT 1: TEX TEMP[1], IN[2], SAMP[0], RECT 2: MOV TEMP[2].xy, IN[2] 3: ADD TEMP[2].xy, IN[0], TEMP[2] 4: MUL TEMP[2].xy, TEMP[2], IMM[0].xxxx 5: MOV TEMP[3].xy, IN[1] 6: MOV TEMP[2].zw, IN[3].xyxy 7: ADD TEMP[3].xy, IN[0], TEMP[3] 8: MUL TEMP[3].xy, TEMP[3], IMM[0].xxxx 9: ADD TEMP[2].zw, IN[2].xyxy, TEMP[2] 10: MUL TEMP[2].zw, TEMP[2], IMM[0].xxxx 11: TEX TEMP[4], TEMP[3], SAMP[0], RECT 12: TEX TEMP[3], IN[1], SAMP[0], RECT 13: TEX TEMP[5], TEMP[2].zwzw, SAMP[0], RECT 14: TEX TEMP[6], TEMP[2], SAMP[0], RECT 15: TEX TEMP[2], IN[0], SAMP[0], RECT 16: MUL TEMP[3], TEMP[3], IMM[0].zzzz 17: MAD TEMP[2], TEMP[2], IMM[0].yyyy, TEMP[3] 18: MAD TEMP[2], TEMP[1], IMM[0].yyyy, TEMP[2] 19: MAD TEMP[2], TEMP[0], IMM[0].zzzz, TEMP[2] 20: MAD TEMP[2], TEMP[6], IMM[0].wwww, TEMP[2] 21: MAD TEMP[2], TEMP[5], IMM[1].xxxx, TEMP[2] 22: MAD OUT[0], TEMP[4], IMM[1].xxxx, TEMP[2] 23: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[3], RECT[0]; 1: TEX temp[1], input[2], RECT[0]; 2: MOV temp[2].xy, input[2]; 3: ADD temp[2].xy, input[0], temp[2]; 4: MUL temp[2].xy, temp[2], const[0].xxxx; 5: MOV temp[3].xy, input[1]; 6: MOV temp[2].zw, input[3].xyxy; 7: ADD temp[3].xy, input[0], temp[3]; 8: MUL temp[3].xy, temp[3], const[0].xxxx; 9: ADD temp[2].zw, input[2].xyxy, temp[2]; 10: MUL temp[2].zw, temp[2], const[0].xxxx; 11: TEX temp[4], temp[3], RECT[0]; 12: TEX temp[3], input[1], RECT[0]; 13: TEX temp[5], temp[2].zwzw, RECT[0]; 14: TEX temp[6], temp[2], RECT[0]; 15: TEX temp[2], input[0], RECT[0]; 16: MUL temp[3], temp[3], const[0].zzzz; 17: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 18: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 19: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 20: MAD temp[2], temp[6], const[0].wwww, temp[2]; 21: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 22: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[3], RECT[0]; 1: TEX temp[1], input[2], RECT[0]; 2: MOV temp[2].xy, input[2]; 3: ADD temp[2].xy, input[0], temp[2]; 4: MUL temp[2].xy, temp[2], const[0].xxxx; 5: MOV temp[3].xy, input[1]; 6: MOV temp[2].zw, input[3].xyxy; 7: ADD temp[3].xy, input[0], temp[3]; 8: MUL temp[3].xy, temp[3], const[0].xxxx; 9: ADD temp[2].zw, input[2].xyxy, temp[2]; 10: MUL temp[2].zw, temp[2], const[0].xxxx; 11: TEX temp[4], temp[3], RECT[0]; 12: TEX temp[3], input[1], RECT[0]; 13: TEX temp[5], temp[2].zwzw, RECT[0]; 14: TEX temp[6], temp[2], RECT[0]; 15: TEX temp[2], input[0], RECT[0]; 16: MUL temp[3], temp[3], const[0].zzzz; 17: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 18: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 19: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 20: MAD temp[2], temp[6], const[0].wwww, temp[2]; 21: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 22: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[3], RECT[0]; 1: TEX temp[1], input[2], RECT[0]; 2: MOV temp[2].xy, input[2]; 3: ADD temp[2].xy, input[0], temp[2]; 4: MUL temp[2].xy, temp[2], const[0].xxxx; 5: MOV temp[3].xy, input[1]; 6: MOV temp[2].zw, input[3].xyxy; 7: ADD temp[3].xy, input[0], temp[3]; 8: MUL temp[3].xy, temp[3], const[0].xxxx; 9: ADD temp[2].zw, input[2].xyxy, temp[2]; 10: MUL temp[2].zw, temp[2], const[0].xxxx; 11: TEX temp[4], temp[3], RECT[0]; 12: TEX temp[3], input[1], RECT[0]; 13: TEX temp[5], temp[2].zwzw, RECT[0]; 14: TEX temp[6], temp[2], RECT[0]; 15: TEX temp[2], input[0], RECT[0]; 16: MUL temp[3], temp[3], const[0].zzzz; 17: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 18: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 19: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 20: MAD temp[2], temp[6], const[0].wwww, temp[2]; 21: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 22: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[3], RECT[0]; 1: TEX temp[1], input[2], RECT[0]; 2: MOV temp[2].xy, input[2]; 3: ADD temp[2].xy, input[0], temp[2]; 4: MUL temp[2].xy, temp[2], const[0].xxxx; 5: MOV temp[3].xy, input[1]; 6: MOV temp[2].zw, input[3].xyxy; 7: ADD temp[3].xy, input[0], temp[3]; 8: MUL temp[3].xy, temp[3], const[0].xxxx; 9: ADD temp[2].zw, input[2].xyxy, temp[2]; 10: MUL temp[2].zw, temp[2], const[0].xxxx; 11: TEX temp[4], temp[3], RECT[0]; 12: TEX temp[3], input[1], RECT[0]; 13: TEX temp[5], temp[2].zwzw, RECT[0]; 14: TEX temp[6], temp[2], RECT[0]; 15: TEX temp[2], input[0], RECT[0]; 16: MUL temp[3], temp[3], const[0].zzzz; 17: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 18: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 19: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 20: MAD temp[2], temp[6], const[0].wwww, temp[2]; 21: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 22: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[3], RECT[0]; 1: TEX temp[1], input[2], RECT[0]; 2: MOV temp[2].xy, input[2]; 3: ADD temp[2].xy, input[0], temp[2]; 4: MUL temp[2].xy, temp[2], const[0].xxxx; 5: MOV temp[3].xy, input[1]; 6: MOV temp[2].zw, input[3].xyxy; 7: ADD temp[3].xy, input[0], temp[3]; 8: MUL temp[3].xy, temp[3], const[0].xxxx; 9: ADD temp[2].zw, input[2].xyxy, temp[2]; 10: MUL temp[2].zw, temp[2], const[0].xxxx; 11: TEX temp[4], temp[3], RECT[0]; 12: TEX temp[3], input[1], RECT[0]; 13: TEX temp[5], temp[2].zwzw, RECT[0]; 14: TEX temp[6], temp[2], RECT[0]; 15: TEX temp[2], input[0], RECT[0]; 16: MUL temp[3], temp[3], const[0].zzzz; 17: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 18: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 19: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 20: MAD temp[2], temp[6], const[0].wwww, temp[2]; 21: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 22: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MUL temp[7], input[3], const[2]; 1: TEX temp[0], temp[7], 2D[0]; 2: MUL temp[8], input[2], const[2]; 3: TEX temp[1], temp[8], 2D[0]; 4: MOV temp[2].xy, input[2]; 5: ADD temp[2].xy, input[0], temp[2]; 6: MUL temp[2].xy, temp[2], const[0].xxxx; 7: MOV temp[3].xy, input[1]; 8: MOV temp[2].zw, input[3].xyxy; 9: ADD temp[3].xy, input[0], temp[3]; 10: MUL temp[3].xy, temp[3], const[0].xxxx; 11: ADD temp[2].zw, input[2].xyxy, temp[2]; 12: MUL temp[2].zw, temp[2], const[0].xxxx; 13: MUL temp[9], temp[3], const[2]; 14: TEX temp[4], temp[9], 2D[0]; 15: MUL temp[10], input[1], const[2]; 16: TEX temp[3], temp[10], 2D[0]; 17: MUL temp[11], temp[2].zwzw, const[2]; 18: TEX temp[5], temp[11], 2D[0]; 19: MUL temp[12], temp[2], const[2]; 20: TEX temp[6], temp[12], 2D[0]; 21: MUL temp[13], input[0], const[2]; 22: TEX temp[2], temp[13], 2D[0]; 23: MUL temp[3], temp[3], const[0].zzzz; 24: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 25: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 26: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 27: MAD temp[2], temp[6], const[0].wwww, temp[2]; 28: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 29: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MUL temp[7], input[3], const[2]; 1: TEX temp[0], temp[7], 2D[0]; 2: MUL temp[8], input[2], const[2]; 3: TEX temp[1], temp[8], 2D[0]; 4: MOV temp[2].xy, input[2]; 5: ADD temp[2].xy, input[0], temp[2]; 6: MUL temp[2].xy, temp[2], const[0].xxxx; 7: MOV temp[3].xy, input[1]; 8: MOV temp[2].zw, input[3].xyxy; 9: ADD temp[3].xy, input[0], temp[3]; 10: MUL temp[3].xy, temp[3], const[0].xxxx; 11: ADD temp[2].zw, input[2].xyxy, temp[2]; 12: MUL temp[2].zw, temp[2], const[0].xxxx; 13: MUL temp[9], temp[3], const[2]; 14: TEX temp[4], temp[9], 2D[0]; 15: MUL temp[10], input[1], const[2]; 16: TEX temp[3], temp[10], 2D[0]; 17: MUL temp[11], temp[2].zwzw, const[2]; 18: TEX temp[5], temp[11], 2D[0]; 19: MUL temp[12], temp[2], const[2]; 20: TEX temp[6], temp[12], 2D[0]; 21: MUL temp[13], input[0], const[2]; 22: TEX temp[2], temp[13], 2D[0]; 23: MUL temp[3], temp[3], const[0].zzzz; 24: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 25: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 26: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 27: MAD temp[2], temp[6], const[0].wwww, temp[2]; 28: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 29: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MUL temp[7].xy, input[3].xy__, const[2].xy__; 1: TEX temp[0], temp[7].xy__, 2D[0]; 2: MUL temp[8].xy, input[2].xy__, const[2].xy__; 3: TEX temp[1], temp[8].xy__, 2D[0]; 4: MOV temp[2].xy, input[2].xy__; 5: ADD temp[2].xy, input[0].xy__, temp[2].xy__; 6: MUL temp[2].xy, temp[2].xy__, const[0].xx__; 7: MOV temp[3].xy, input[1].xy__; 8: MOV temp[2].zw, input[3].__xy; 9: ADD temp[3].xy, input[0].xy__, temp[3].xy__; 10: MUL temp[3].xy, temp[3].xy__, const[0].xx__; 11: ADD temp[2].zw, input[2].__xy, temp[2].__zw; 12: MUL temp[2].zw, temp[2].__zw, const[0].__xx; 13: MUL temp[9].xy, temp[3].xy__, const[2].xy__; 14: TEX temp[4], temp[9].xy__, 2D[0]; 15: MUL temp[10].xy, input[1].xy__, const[2].xy__; 16: TEX temp[3], temp[10].xy__, 2D[0]; 17: MUL temp[11].xy, temp[2].zw__, const[2].xy__; 18: TEX temp[5], temp[11].xy__, 2D[0]; 19: MUL temp[12].xy, temp[2].xy__, const[2].xy__; 20: TEX temp[6], temp[12].xy__, 2D[0]; 21: MUL temp[13].xy, input[0].xy__, const[2].xy__; 22: TEX temp[2], temp[13].xy__, 2D[0]; 23: MUL temp[3], temp[3], const[0].zzzz; 24: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 25: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 26: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 27: MAD temp[2], temp[6], const[0].wwww, temp[2]; 28: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 29: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MUL temp[7].xy, input[3].xy__, const[2].xy__; 1: TEX temp[0], temp[7].xy__, 2D[0]; 2: MUL temp[8].xy, input[2].xy__, const[2].xy__; 3: TEX temp[1], temp[8].xy__, 2D[0]; 4: MOV temp[2].xy, input[2].xy__; 5: ADD temp[2].xy, input[0].xy__, temp[2].xy__; 6: MUL temp[2].xy, temp[2].xy__, const[0].xx__; 7: MOV temp[3].xy, input[1].xy__; 8: MOV temp[2].zw, input[3].__xy; 9: ADD temp[3].xy, input[0].xy__, temp[3].xy__; 10: MUL temp[3].xy, temp[3].xy__, const[0].xx__; 11: ADD temp[2].zw, input[2].__xy, temp[2].__zw; 12: MUL temp[2].zw, temp[2].__zw, const[0].__xx; 13: MUL temp[9].xy, temp[3].xy__, const[2].xy__; 14: TEX temp[4], temp[9].xy__, 2D[0]; 15: MUL temp[10].xy, input[1].xy__, const[2].xy__; 16: TEX temp[3], temp[10].xy__, 2D[0]; 17: MUL temp[11].xy, temp[2].zw__, const[2].xy__; 18: TEX temp[5], temp[11].xy__, 2D[0]; 19: MUL temp[12].xy, temp[2].xy__, const[2].xy__; 20: TEX temp[6], temp[12].xy__, 2D[0]; 21: MUL temp[13].xy, input[0].xy__, const[2].xy__; 22: TEX temp[2], temp[13].xy__, 2D[0]; 23: MUL temp[3], temp[3], const[0].zzzz; 24: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 25: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 26: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 27: MAD temp[2], temp[6], const[0].wwww, temp[2]; 28: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 29: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MUL temp[14].xy, input[3].xy__, const[2].xy__; 1: TEX temp[15], temp[14].xy__, 2D[0]; 2: MUL temp[16].xy, input[2].xy__, const[2].xy__; 3: TEX temp[17], temp[16].xy__, 2D[0]; 4: MOV temp[18].xy, input[2].xy__; 5: ADD temp[19].xy, input[0].xy__, temp[18].xy__; 6: MUL temp[20].xy, temp[19].xy__, const[0].xx__; 7: MOV temp[21].xy, input[1].xy__; 8: MOV temp[22].zw, input[3].__xy; 9: ADD temp[23].xy, input[0].xy__, temp[21].xy__; 10: MUL temp[24].xy, temp[23].xy__, const[0].xx__; 11: ADD temp[25].zw, input[2].__xy, temp[22].__zw; 12: MUL temp[26].zw, temp[25].__zw, const[0].__xx; 13: MUL temp[27].xy, temp[24].xy__, const[2].xy__; 14: TEX temp[28], temp[27].xy__, 2D[0]; 15: MUL temp[29].xy, input[1].xy__, const[2].xy__; 16: TEX temp[30], temp[29].xy__, 2D[0]; 17: MUL temp[31].xy, temp[26].zw__, const[2].xy__; 18: TEX temp[32], temp[31].xy__, 2D[0]; 19: MUL temp[33].xy, temp[20].xy__, const[2].xy__; 20: TEX temp[34], temp[33].xy__, 2D[0]; 21: MUL temp[35].xy, input[0].xy__, const[2].xy__; 22: TEX temp[36], temp[35].xy__, 2D[0]; 23: MUL temp[37], temp[30], const[0].zzzz; 24: MAD temp[38], temp[36], const[0].yyyy, temp[37]; 25: MAD temp[39], temp[17], const[0].yyyy, temp[38]; 26: MAD temp[40], temp[15], const[0].zzzz, temp[39]; 27: MAD temp[41], temp[34], const[0].wwww, temp[40]; 28: MAD temp[42], temp[32], const[1].xxxx, temp[41]; 29: MAD output[0], temp[28], const[1].xxxx, temp[42]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MUL temp[14].xy, input[3].xy__, const[2].xy__; 1: TEX temp[15], temp[14].xy__, 2D[0]; 2: MUL temp[16].xy, input[2].xy__, const[2].xy__; 3: TEX temp[17], temp[16].xy__, 2D[0]; 4: MUL temp[20].xy, (input[2] + input[0]).xy__, none.HH__; 5: MUL temp[24].xy, (input[1] + input[0]).xy__, none.HH__; 6: MUL temp[26].zw, (input[3] + input[2]).__xy, none.__HH; 7: MUL temp[27].xy, temp[24].xy__, const[2].xy__; 8: TEX temp[28], temp[27].xy__, 2D[0]; 9: MUL temp[29].xy, input[1].xy__, const[2].xy__; 10: TEX temp[30], temp[29].xy__, 2D[0]; 11: MUL temp[31].xy, temp[26].zw__, const[2].xy__; 12: TEX temp[32], temp[31].xy__, 2D[0]; 13: MUL temp[33].xy, temp[20].xy__, const[2].xy__; 14: TEX temp[34], temp[33].xy__, 2D[0]; 15: MUL temp[35].xy, input[0].xy__, const[2].xy__; 16: TEX temp[36], temp[35].xy__, 2D[0]; 17: MUL temp[37], temp[30], const[0].zzzz; 18: MAD temp[38], temp[36], const[0].yyyy, temp[37]; 19: MAD temp[39], temp[17], const[0].yyyy, temp[38]; 20: MAD temp[40], temp[15], const[0].zzzz, temp[39]; 21: MAD temp[41], temp[34], const[0].wwww, temp[40]; 22: MAD temp[42], temp[32], const[1].xxxx, temp[41]; 23: MAD output[0], temp[28], const[1].xxxx, temp[42]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MUL temp[14].xy, input[3].xy__, const[2].xy__; 1: TEX temp[15], temp[14].xy__, 2D[0]; 2: MUL temp[16].xy, input[2].xy__, const[2].xy__; 3: TEX temp[17], temp[16].xy__, 2D[0]; 4: MUL temp[20].xy, (input[2] + input[0]).xy__, none.HH__; 5: MUL temp[24].xy, (input[1] + input[0]).xy__, none.HH__; 6: MUL temp[26].zw, (input[3] + input[2]).__xy, none.__HH; 7: MUL temp[27].xy, temp[24].xy__, const[2].xy__; 8: TEX temp[28], temp[27].xy__, 2D[0]; 9: MUL temp[29].xy, input[1].xy__, const[2].xy__; 10: TEX temp[30], temp[29].xy__, 2D[0]; 11: MOV temp[0].x, temp[26].z___; 12: MOV temp[0].y, temp[26]._w__; 13: MUL temp[31].xy, temp[0].xy__, const[2].xy__; 14: TEX temp[32], temp[31].xy__, 2D[0]; 15: MUL temp[33].xy, temp[20].xy__, const[2].xy__; 16: TEX temp[34], temp[33].xy__, 2D[0]; 17: MUL temp[35].xy, input[0].xy__, const[2].xy__; 18: TEX temp[36], temp[35].xy__, 2D[0]; 19: MUL temp[37], temp[30], const[0].zzzz; 20: MAD temp[38], temp[36], const[0].yyyy, temp[37]; 21: MAD temp[39], temp[17], const[0].yyyy, temp[38]; 22: MAD temp[40], temp[15], const[0].zzzz, temp[39]; 23: MAD temp[41], temp[34], const[0].wwww, temp[40]; 24: MAD temp[42], temp[32], const[1].xxxx, temp[41]; 25: MAD output[0], temp[28], const[1].xxxx, temp[42]; CONST[0] = { 0.5000 0.1500 0.0750 0.3500 } CONST[1] = { 0.1000 0.0000 1.0000 0.0000 } Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MUL temp[14].xy, input[3].xy__, const[2].xy__; 1: TEX temp[15], temp[14].xy__, 2D[0]; 2: MUL temp[16].xy, input[2].xy__, const[2].xy__; 3: TEX temp[17], temp[16].xy__, 2D[0]; 4: MUL temp[20].xy, (input[2] + input[0]).xy__, none.HH__; 5: MUL temp[24].xy, (input[1] + input[0]).xy__, none.HH__; 6: MUL temp[26].zw, (input[3] + input[2]).__xy, none.__HH; 7: MUL temp[27].xy, temp[24].xy__, const[2].xy__; 8: TEX temp[28], temp[27].xy__, 2D[0]; 9: MUL temp[29].xy, input[1].xy__, const[2].xy__; 10: TEX temp[30], temp[29].xy__, 2D[0]; 11: MOV temp[0].x, temp[26].z___; 12: MOV temp[0].y, temp[26]._w__; 13: MUL temp[31].xy, temp[0].xy__, const[2].xy__; 14: TEX temp[32], temp[31].xy__, 2D[0]; 15: MUL temp[33].xy, temp[20].xy__, const[2].xy__; 16: TEX temp[34], temp[33].xy__, 2D[0]; 17: MUL temp[35].xy, input[0].xy__, const[2].xy__; 18: TEX temp[36], temp[35].xy__, 2D[0]; 19: MUL temp[37], temp[30], const[0].zzzz; 20: MAD temp[38], temp[36], const[0].yyyy, temp[37]; 21: MAD temp[39], temp[17], const[0].yyyy, temp[38]; 22: MAD temp[40], temp[15], const[0].zzzz, temp[39]; 23: MAD temp[41], temp[34], const[0].wwww, temp[40]; 24: MAD temp[42], temp[32], const[1].xxxx, temp[41]; 25: MAD output[0], temp[28], const[1].xxxx, temp[42]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: src0.xyz = input[3], src1.xyz = const[2] MAD temp[14].xy, src0.xy_, src1.xy_, src0.000 1: TEX temp[15], temp[14].xy__, 2D[0]; 2: src0.xyz = input[2], src1.xyz = const[2] MAD temp[16].xy, src0.xy_, src1.xy_, src0.000 3: TEX temp[17], temp[16].xy__, 2D[0]; 4: src0.xyz = input[0], src1.xyz = input[2], srcp.xyz = (src1 + src0) MAD temp[20].xy, srcp.xy_, src0.HH_, src0.000 5: src0.xyz = input[0], src1.xyz = input[1], srcp.xyz = (src1 + src0) MAD temp[24].xy, srcp.xy_, src0.HH_, src0.000 6: src0.xyz = input[2], src1.xyz = input[3], srcp.xyz = (src1 + src0) MAD temp[26].z, srcp.__x, src0.__H, src0.000 MAD temp[26].w, srcp.y, src0.H, src0.0 7: src0.xyz = temp[24], src1.xyz = const[2] MAD temp[27].xy, src0.xy_, src1.xy_, src0.000 8: TEX temp[28], temp[27].xy__, 2D[0]; 9: src0.xyz = input[1], src1.xyz = const[2] MAD temp[29].xy, src0.xy_, src1.xy_, src0.000 10: TEX temp[30], temp[29].xy__, 2D[0]; 11: src0.xyz = temp[26] MAD temp[0].x, src0.z__, src0.111, src0.000 12: src0.w = temp[26] MAD temp[0].y, src0._w_, src0.111, src0.000 13: src0.xyz = temp[0], src1.xyz = const[2] MAD temp[31].xy, src0.xy_, src1.xy_, src0.000 14: TEX temp[32], temp[31].xy__, 2D[0]; 15: src0.xyz = temp[20], src1.xyz = const[2] MAD temp[33].xy, src0.xy_, src1.xy_, src0.000 16: TEX temp[34], temp[33].xy__, 2D[0]; 17: src0.xyz = input[0], src1.xyz = const[2] MAD temp[35].xy, src0.xy_, src1.xy_, src0.000 18: TEX temp[36], temp[35].xy__, 2D[0]; 19: src0.xyz = temp[30], src0.w = temp[30], src1.xyz = const[0] MAD temp[37].xyz, src0.xyz, src1.zzz, src0.000 MAD temp[37].w, src0.w, src1.z, src0.0 20: src0.xyz = temp[36], src0.w = temp[36], src1.xyz = const[0], src1.w = temp[37], src2.xyz = temp[37] MAD temp[38].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[38].w, src0.w, src1.y, src1.w 21: src0.xyz = temp[17], src0.w = temp[17], src1.xyz = const[0], src1.w = temp[38], src2.xyz = temp[38] MAD temp[39].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[39].w, src0.w, src1.y, src1.w 22: src0.xyz = temp[15], src0.w = temp[15], src1.xyz = const[0], src1.w = temp[39], src2.xyz = temp[39] MAD temp[40].xyz, src0.xyz, src1.zzz, src2.xyz MAD temp[40].w, src0.w, src1.z, src1.w 23: src0.xyz = temp[34], src0.w = temp[34], src1.xyz = temp[40], src1.w = const[0], src2.w = temp[40] MAD temp[41].xyz, src0.xyz, src1.www, src1.xyz MAD temp[41].w, src0.w, src1.w, src2.w 24: src0.xyz = temp[32], src0.w = temp[32], src1.xyz = const[1], src1.w = temp[41], src2.xyz = temp[41] MAD temp[42].xyz, src0.xyz, src1.xxx, src2.xyz MAD temp[42].w, src0.w, src1.x, src1.w 25: src0.xyz = temp[28], src0.w = temp[28], src1.xyz = const[1], src1.w = temp[42], src2.xyz = temp[42] MAD color[0].xyz, src0.xyz, src1.xxx, src2.xyz MAD color[0].w, src0.w, src1.x, src1.w Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[2], src1.xyz = input[3], srcp.xyz = (src1 + src0) MAD temp[26].z, srcp.__x, src0.__H, src0.000 MAD temp[26].w, srcp.y, src0.H, src0.0 1: src0.xyz = input[0], src1.xyz = input[2], srcp.xyz = (src1 + src0) MAD temp[20].xy, srcp.xy_, src0.HH_, src0.000 2: src0.xyz = input[0], src1.xyz = input[1], srcp.xyz = (src1 + src0) MAD temp[24].xy, srcp.xy_, src0.HH_, src0.000 3: src0.xyz = input[3], src1.xyz = const[2] MAD temp[14].xy, src0.xy_, src1.xy_, src0.000 4: src0.xyz = input[2], src1.xyz = const[2] MAD temp[16].xy, src0.xy_, src1.xy_, src0.000 5: src0.xyz = input[1], src1.xyz = const[2] MAD temp[29].xy, src0.xy_, src1.xy_, src0.000 6: src0.xyz = input[0], src1.xyz = const[2] MAD temp[35].xy, src0.xy_, src1.xy_, src0.000 7: src0.xyz = temp[20], src1.xyz = const[2] MAD temp[33].xy, src0.xy_, src1.xy_, src0.000 8: src0.xyz = temp[24], src1.xyz = const[2] MAD temp[27].xy, src0.xy_, src1.xy_, src0.000 9: src0.xyz = temp[26] MAD temp[0].x, src0.z__, src0.111, src0.000 10: src0.w = temp[26] MAD temp[0].y, src0._w_, src0.111, src0.000 11: src0.xyz = temp[0], src1.xyz = const[2] MAD temp[31].xy, src0.xy_, src1.xy_, src0.000 12: BEGIN_TEX; 13: TEX temp[15], temp[14].xy__, 2D[0]; 14: TEX temp[17], temp[16].xy__, 2D[0]; 15: TEX temp[30], temp[29].xy__, 2D[0]; 16: TEX temp[36], temp[35].xy__, 2D[0]; 17: TEX temp[34], temp[33].xy__, 2D[0]; 18: TEX temp[28], temp[27].xy__, 2D[0]; 19: TEX temp[32], temp[31].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 20: src0.xyz = temp[30], src0.w = temp[30], src1.xyz = const[0] SEM_WAIT MAD temp[37].xyz, src0.xyz, src1.zzz, src0.000 MAD temp[37].w, src0.w, src1.z, src0.0 21: src0.xyz = temp[36], src0.w = temp[36], src1.xyz = const[0], src1.w = temp[37], src2.xyz = temp[37] MAD temp[38].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[38].w, src0.w, src1.y, src1.w 22: src0.xyz = temp[17], src0.w = temp[17], src1.xyz = const[0], src1.w = temp[38], src2.xyz = temp[38] MAD temp[39].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[39].w, src0.w, src1.y, src1.w 23: src0.xyz = temp[15], src0.w = temp[15], src1.xyz = const[0], src1.w = temp[39], src2.xyz = temp[39] MAD temp[40].xyz, src0.xyz, src1.zzz, src2.xyz MAD temp[40].w, src0.w, src1.z, src1.w 24: src0.xyz = temp[34], src0.w = temp[34], src1.xyz = temp[40], src1.w = const[0], src2.w = temp[40] MAD temp[41].xyz, src0.xyz, src1.www, src1.xyz MAD temp[41].w, src0.w, src1.w, src2.w 25: src0.xyz = temp[32], src0.w = temp[32], src1.xyz = const[1], src1.w = temp[41], src2.xyz = temp[41] MAD temp[42].xyz, src0.xyz, src1.xxx, src2.xyz MAD temp[42].w, src0.w, src1.x, src1.w 26: src0.xyz = temp[28], src0.w = temp[28], src1.xyz = const[1], src1.w = temp[42], src2.xyz = temp[42] MAD color[0].xyz, src0.xyz, src1.xxx, src2.xyz MAD color[0].w, src0.w, src1.x, src1.w Fragment Program: after 'dead sources' # Radeon Compiler Program 0: src0.xyz = input[2], src1.xyz = input[3], srcp.xyz = (src1 + src0) MAD temp[26].z, srcp.__x, src0.__H, src0.000 MAD temp[26].w, srcp.y, src0.H, src0.0 1: src0.xyz = input[0], src1.xyz = input[2], srcp.xyz = (src1 + src0) MAD temp[20].xy, srcp.xy_, src0.HH_, src0.000 2: src0.xyz = input[0], src1.xyz = input[1], srcp.xyz = (src1 + src0) MAD temp[24].xy, srcp.xy_, src0.HH_, src0.000 3: src0.xyz = input[3], src1.xyz = const[2] MAD temp[14].xy, src0.xy_, src1.xy_, src0.000 4: src0.xyz = input[2], src1.xyz = const[2] MAD temp[16].xy, src0.xy_, src1.xy_, src0.000 5: src0.xyz = input[1], src1.xyz = const[2] MAD temp[29].xy, src0.xy_, src1.xy_, src0.000 6: src0.xyz = input[0], src1.xyz = const[2] MAD temp[35].xy, src0.xy_, src1.xy_, src0.000 7: src0.xyz = temp[20], src1.xyz = const[2] MAD temp[33].xy, src0.xy_, src1.xy_, src0.000 8: src0.xyz = temp[24], src1.xyz = const[2] MAD temp[27].xy, src0.xy_, src1.xy_, src0.000 9: src0.xyz = temp[26] MAD temp[0].x, src0.z__, src0.111, src0.000 10: src0.w = temp[26] MAD temp[0].y, src0._w_, src0.111, src0.000 11: src0.xyz = temp[0], src1.xyz = const[2] MAD temp[31].xy, src0.xy_, src1.xy_, src0.000 12: BEGIN_TEX; 13: TEX temp[15], temp[14].xy__, 2D[0]; 14: TEX temp[17], temp[16].xy__, 2D[0]; 15: TEX temp[30], temp[29].xy__, 2D[0]; 16: TEX temp[36], temp[35].xy__, 2D[0]; 17: TEX temp[34], temp[33].xy__, 2D[0]; 18: TEX temp[28], temp[27].xy__, 2D[0]; 19: TEX temp[32], temp[31].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 20: src0.xyz = temp[30], src0.w = temp[30], src1.xyz = const[0] SEM_WAIT MAD temp[37].xyz, src0.xyz, src1.zzz, src0.000 MAD temp[37].w, src0.w, src1.z, src0.0 21: src0.xyz = temp[36], src0.w = temp[36], src1.xyz = const[0], src1.w = temp[37], src2.xyz = temp[37] MAD temp[38].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[38].w, src0.w, src1.y, src1.w 22: src0.xyz = temp[17], src0.w = temp[17], src1.xyz = const[0], src1.w = temp[38], src2.xyz = temp[38] MAD temp[39].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[39].w, src0.w, src1.y, src1.w 23: src0.xyz = temp[15], src0.w = temp[15], src1.xyz = const[0], src1.w = temp[39], src2.xyz = temp[39] MAD temp[40].xyz, src0.xyz, src1.zzz, src2.xyz MAD temp[40].w, src0.w, src1.z, src1.w 24: src0.xyz = temp[34], src0.w = temp[34], src1.xyz = temp[40], src1.w = const[0], src2.w = temp[40] MAD temp[41].xyz, src0.xyz, src1.www, src1.xyz MAD temp[41].w, src0.w, src1.w, src2.w 25: src0.xyz = temp[32], src0.w = temp[32], src1.xyz = const[1], src1.w = temp[41], src2.xyz = temp[41] MAD temp[42].xyz, src0.xyz, src1.xxx, src2.xyz MAD temp[42].w, src0.w, src1.x, src1.w 26: src0.xyz = temp[28], src0.w = temp[28], src1.xyz = const[1], src1.w = temp[42], src2.xyz = temp[42] MAD color[0].xyz, src0.xyz, src1.xxx, src2.xyz MAD color[0].w, src0.w, src1.x, src1.w Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = input[2], src1.xyz = input[3], srcp.xyz = (src1 + src0) MAD temp[0].z, srcp.__x, src0.__H, src0.__0 MAD temp[0].w, srcp.y, src0.H, src0.0 1: src0.xyz = input[0], src1.xyz = input[2], srcp.xyz = (src1 + src0) MAD temp[4].xy, srcp.xy_, src0.HH_, src0.00_ 2: src0.xyz = input[0], src1.xyz = input[1], srcp.xyz = (src1 + src0) MAD temp[5].xy, srcp.xy_, src0.HH_, src0.00_ 3: src0.xyz = input[3], src1.xyz = const[2] MAD temp[3].xy, src0.xy_, src1.xy_, src0.00_ 4: src0.xyz = input[2], src1.xyz = const[2] MAD temp[2].xy, src0.xy_, src1.xy_, src0.00_ 5: src0.xyz = input[1], src1.xyz = const[2] MAD temp[1].xy, src0.xy_, src1.xy_, src0.00_ 6: src0.xyz = input[0], src1.xyz = const[2] MAD temp[0].xy, src0.xy_, src1.xy_, src0.00_ 7: src0.xyz = temp[4], src1.xyz = const[2] MAD temp[4].xy, src0.xy_, src1.xy_, src0.00_ 8: src0.xyz = temp[5], src1.xyz = const[2] MAD temp[5].xy, src0.xy_, src1.xy_, src0.00_ 9: src0.xyz = temp[0] MAD temp[6].x, src0.z__, src0.11_, src0.00_ 10: src0.w = temp[0] MAD temp[6].y, src0._w_, src0.11_, src0.00_ 11: src0.xyz = temp[6], src1.xyz = const[2] MAD temp[6].xy, src0.xy_, src1.xy_, src0.00_ 12: BEGIN_TEX; 13: TEX temp[3], temp[3].xy__, 2D[0]; 14: TEX temp[2], temp[2].xy__, 2D[0]; 15: TEX temp[1], temp[1].xy__, 2D[0]; 16: TEX temp[0], temp[0].xy__, 2D[0]; 17: TEX temp[4], temp[4].xy__, 2D[0]; 18: TEX temp[5], temp[5].xy__, 2D[0]; 19: TEX temp[6], temp[6].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 20: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = const[0] SEM_WAIT MAD temp[1].xyz, src0.xyz, src1.zzz, src0.000 MAD temp[1].w, src0.w, src1.z, src0.0 21: src0.xyz = temp[0], src0.w = temp[0], src1.xyz = const[0], src1.w = temp[1], src2.xyz = temp[1] MAD temp[0].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[0].w, src0.w, src1.y, src1.w 22: src0.xyz = temp[2], src0.w = temp[2], src1.xyz = const[0], src1.w = temp[0], src2.xyz = temp[0] MAD temp[0].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[0].w, src0.w, src1.y, src1.w 23: src0.xyz = temp[3], src0.w = temp[3], src1.xyz = const[0], src1.w = temp[0], src2.xyz = temp[0] MAD temp[0].xyz, src0.xyz, src1.zzz, src2.xyz MAD temp[0].w, src0.w, src1.z, src1.w 24: src0.xyz = temp[4], src0.w = temp[4], src1.xyz = temp[0], src1.w = const[0], src2.w = temp[0] MAD temp[0].xyz, src0.xyz, src1.www, src1.xyz MAD temp[0].w, src0.w, src1.w, src2.w 25: src0.xyz = temp[6], src0.w = temp[6], src1.xyz = const[1], src1.w = temp[0], src2.xyz = temp[0] MAD temp[0].xyz, src0.xyz, src1.xxx, src2.xyz MAD temp[0].w, src0.w, src1.x, src1.w 26: src0.xyz = temp[5], src0.w = temp[5], src1.xyz = const[1], src1.w = temp[0], src2.xyz = temp[0] MAD color[0].xyz, src0.xyz, src1.xxx, src2.xyz MAD color[0].w, src0.w, src1.x, src1.w r300compiler error: compiler/r300_fragprog_emit.c::emit_alu(): Too many ALU instructions r300 FP: Compiler Error: compiler/r300_fragprog_emit.c::emit_alu(): Too many ALU instructions Using a dummy shader instead. r300: Initial fragment program FRAG DCL OUT[0], COLOR IMM FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxy 1: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], none.0001; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[0], none.0001; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], none.0001; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 pc=17************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: 0.0 1.0 0.0 op: 00050a94 w: 1.0 1.0 0.0 op: 00040891 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL IN[2], GENERIC[2], PERSPECTIVE DCL IN[3], GENERIC[3], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..6] IMM FLT32 { 0.5000, 0.1500, 0.0750, 0.3500} IMM FLT32 { 0.1000, 0.0000, 1.0000, 0.0000} 0: TEX TEMP[0], IN[3], SAMP[0], 2D 1: TEX TEMP[1], IN[2], SAMP[0], 2D 2: MOV TEMP[2].xy, IN[2] 3: ADD TEMP[2].xy, IN[0], TEMP[2] 4: MUL TEMP[2].xy, TEMP[2], IMM[0].xxxx 5: MOV TEMP[3].xy, IN[1] 6: MOV TEMP[2].zw, IN[3].xyxy 7: ADD TEMP[3].xy, IN[0], TEMP[3] 8: MUL TEMP[3].xy, TEMP[3], IMM[0].xxxx 9: ADD TEMP[2].zw, IN[2].xyxy, TEMP[2] 10: MUL TEMP[2].zw, TEMP[2], IMM[0].xxxx 11: TEX TEMP[4], TEMP[3], SAMP[0], 2D 12: TEX TEMP[3], IN[1], SAMP[0], 2D 13: TEX TEMP[5], TEMP[2].zwzw, SAMP[0], 2D 14: TEX TEMP[6], TEMP[2], SAMP[0], 2D 15: TEX TEMP[2], IN[0], SAMP[0], 2D 16: MUL TEMP[3], TEMP[3], IMM[0].zzzz 17: MAD TEMP[2], TEMP[2], IMM[0].yyyy, TEMP[3] 18: MAD TEMP[2], TEMP[1], IMM[0].yyyy, TEMP[2] 19: MAD TEMP[2], TEMP[0], IMM[0].zzzz, TEMP[2] 20: MAD TEMP[2], TEMP[6], IMM[0].wwww, TEMP[2] 21: MAD TEMP[2], TEMP[5], IMM[1].xxxx, TEMP[2] 22: MAD OUT[0], TEMP[4], IMM[1].xxxx, TEMP[2] 23: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0], input[3], 2D[0]; 1: TEX temp[1], input[2], 2D[0]; 2: MOV temp[2].xy, input[2]; 3: ADD temp[2].xy, input[0], temp[2]; 4: MUL temp[2].xy, temp[2], const[0].xxxx; 5: MOV temp[3].xy, input[1]; 6: MOV temp[2].zw, input[3].xyxy; 7: ADD temp[3].xy, input[0], temp[3]; 8: MUL temp[3].xy, temp[3], const[0].xxxx; 9: ADD temp[2].zw, input[2].xyxy, temp[2]; 10: MUL temp[2].zw, temp[2], const[0].xxxx; 11: TEX temp[4], temp[3], 2D[0]; 12: TEX temp[3], input[1], 2D[0]; 13: TEX temp[5], temp[2].zwzw, 2D[0]; 14: TEX temp[6], temp[2], 2D[0]; 15: TEX temp[2], input[0], 2D[0]; 16: MUL temp[3], temp[3], const[0].zzzz; 17: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 18: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 19: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 20: MAD temp[2], temp[6], const[0].wwww, temp[2]; 21: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 22: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0], input[3], 2D[0]; 1: TEX temp[1], input[2], 2D[0]; 2: MOV temp[2].xy, input[2]; 3: ADD temp[2].xy, input[0], temp[2]; 4: MUL temp[2].xy, temp[2], const[0].xxxx; 5: MOV temp[3].xy, input[1]; 6: MOV temp[2].zw, input[3].xyxy; 7: ADD temp[3].xy, input[0], temp[3]; 8: MUL temp[3].xy, temp[3], const[0].xxxx; 9: ADD temp[2].zw, input[2].xyxy, temp[2]; 10: MUL temp[2].zw, temp[2], const[0].xxxx; 11: TEX temp[4], temp[3], 2D[0]; 12: TEX temp[3], input[1], 2D[0]; 13: TEX temp[5], temp[2].zwzw, 2D[0]; 14: TEX temp[6], temp[2], 2D[0]; 15: TEX temp[2], input[0], 2D[0]; 16: MUL temp[3], temp[3], const[0].zzzz; 17: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 18: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 19: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 20: MAD temp[2], temp[6], const[0].wwww, temp[2]; 21: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 22: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0], input[3], 2D[0]; 1: TEX temp[1], input[2], 2D[0]; 2: MOV temp[2].xy, input[2]; 3: ADD temp[2].xy, input[0], temp[2]; 4: MUL temp[2].xy, temp[2], const[0].xxxx; 5: MOV temp[3].xy, input[1]; 6: MOV temp[2].zw, input[3].xyxy; 7: ADD temp[3].xy, input[0], temp[3]; 8: MUL temp[3].xy, temp[3], const[0].xxxx; 9: ADD temp[2].zw, input[2].xyxy, temp[2]; 10: MUL temp[2].zw, temp[2], const[0].xxxx; 11: TEX temp[4], temp[3], 2D[0]; 12: TEX temp[3], input[1], 2D[0]; 13: TEX temp[5], temp[2].zwzw, 2D[0]; 14: TEX temp[6], temp[2], 2D[0]; 15: TEX temp[2], input[0], 2D[0]; 16: MUL temp[3], temp[3], const[0].zzzz; 17: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 18: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 19: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 20: MAD temp[2], temp[6], const[0].wwww, temp[2]; 21: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 22: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0], input[3], 2D[0]; 1: TEX temp[1], input[2], 2D[0]; 2: MOV temp[2].xy, input[2]; 3: ADD temp[2].xy, input[0], temp[2]; 4: MUL temp[2].xy, temp[2], const[0].xxxx; 5: MOV temp[3].xy, input[1]; 6: MOV temp[2].zw, input[3].xyxy; 7: ADD temp[3].xy, input[0], temp[3]; 8: MUL temp[3].xy, temp[3], const[0].xxxx; 9: ADD temp[2].zw, input[2].xyxy, temp[2]; 10: MUL temp[2].zw, temp[2], const[0].xxxx; 11: TEX temp[4], temp[3], 2D[0]; 12: TEX temp[3], input[1], 2D[0]; 13: TEX temp[5], temp[2].zwzw, 2D[0]; 14: TEX temp[6], temp[2], 2D[0]; 15: TEX temp[2], input[0], 2D[0]; 16: MUL temp[3], temp[3], const[0].zzzz; 17: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 18: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 19: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 20: MAD temp[2], temp[6], const[0].wwww, temp[2]; 21: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 22: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0], input[3], 2D[0]; 1: TEX temp[1], input[2], 2D[0]; 2: MOV temp[2].xy, input[2]; 3: ADD temp[2].xy, input[0], temp[2]; 4: MUL temp[2].xy, temp[2], const[0].xxxx; 5: MOV temp[3].xy, input[1]; 6: MOV temp[2].zw, input[3].xyxy; 7: ADD temp[3].xy, input[0], temp[3]; 8: MUL temp[3].xy, temp[3], const[0].xxxx; 9: ADD temp[2].zw, input[2].xyxy, temp[2]; 10: MUL temp[2].zw, temp[2], const[0].xxxx; 11: TEX temp[4], temp[3], 2D[0]; 12: TEX temp[3], input[1], 2D[0]; 13: TEX temp[5], temp[2].zwzw, 2D[0]; 14: TEX temp[6], temp[2], 2D[0]; 15: TEX temp[2], input[0], 2D[0]; 16: MUL temp[3], temp[3], const[0].zzzz; 17: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 18: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 19: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 20: MAD temp[2], temp[6], const[0].wwww, temp[2]; 21: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 22: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[0], input[3], 2D[0]; 1: TEX temp[1], input[2], 2D[0]; 2: MOV temp[2].xy, input[2]; 3: ADD temp[2].xy, input[0], temp[2]; 4: MUL temp[2].xy, temp[2], const[0].xxxx; 5: MOV temp[3].xy, input[1]; 6: MOV temp[2].zw, input[3].xyxy; 7: ADD temp[3].xy, input[0], temp[3]; 8: MUL temp[3].xy, temp[3], const[0].xxxx; 9: ADD temp[2].zw, input[2].xyxy, temp[2]; 10: MUL temp[2].zw, temp[2], const[0].xxxx; 11: TEX temp[4], temp[3], 2D[0]; 12: TEX temp[3], input[1], 2D[0]; 13: TEX temp[5], temp[2].zwzw, 2D[0]; 14: TEX temp[6], temp[2], 2D[0]; 15: TEX temp[2], input[0], 2D[0]; 16: MUL temp[3], temp[3], const[0].zzzz; 17: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 18: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 19: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 20: MAD temp[2], temp[6], const[0].wwww, temp[2]; 21: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 22: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[0], input[3], 2D[0]; 1: TEX temp[1], input[2], 2D[0]; 2: MOV temp[2].xy, input[2]; 3: ADD temp[2].xy, input[0], temp[2]; 4: MUL temp[2].xy, temp[2], const[0].xxxx; 5: MOV temp[3].xy, input[1]; 6: MOV temp[2].zw, input[3].xyxy; 7: ADD temp[3].xy, input[0], temp[3]; 8: MUL temp[3].xy, temp[3], const[0].xxxx; 9: ADD temp[2].zw, input[2].xyxy, temp[2]; 10: MUL temp[2].zw, temp[2], const[0].xxxx; 11: TEX temp[4], temp[3], 2D[0]; 12: TEX temp[3], input[1], 2D[0]; 13: TEX temp[5], temp[2].zwzw, 2D[0]; 14: TEX temp[6], temp[2], 2D[0]; 15: TEX temp[2], input[0], 2D[0]; 16: MUL temp[3], temp[3], const[0].zzzz; 17: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 18: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 19: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 20: MAD temp[2], temp[6], const[0].wwww, temp[2]; 21: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 22: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[0], input[3].xy__, 2D[0]; 1: TEX temp[1], input[2].xy__, 2D[0]; 2: MOV temp[2].xy, input[2].xy__; 3: ADD temp[2].xy, input[0].xy__, temp[2].xy__; 4: MUL temp[2].xy, temp[2].xy__, const[0].xx__; 5: MOV temp[3].xy, input[1].xy__; 6: MOV temp[2].zw, input[3].__xy; 7: ADD temp[3].xy, input[0].xy__, temp[3].xy__; 8: MUL temp[3].xy, temp[3].xy__, const[0].xx__; 9: ADD temp[2].zw, input[2].__xy, temp[2].__zw; 10: MUL temp[2].zw, temp[2].__zw, const[0].__xx; 11: TEX temp[4], temp[3].xy__, 2D[0]; 12: TEX temp[3], input[1].xy__, 2D[0]; 13: TEX temp[5], temp[2].zw__, 2D[0]; 14: TEX temp[6], temp[2].xy__, 2D[0]; 15: TEX temp[2], input[0].xy__, 2D[0]; 16: MUL temp[3], temp[3], const[0].zzzz; 17: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 18: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 19: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 20: MAD temp[2], temp[6], const[0].wwww, temp[2]; 21: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 22: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[0], input[3].xy__, 2D[0]; 1: TEX temp[1], input[2].xy__, 2D[0]; 2: MOV temp[2].xy, input[2].xy__; 3: ADD temp[2].xy, input[0].xy__, temp[2].xy__; 4: MUL temp[2].xy, temp[2].xy__, const[0].xx__; 5: MOV temp[3].xy, input[1].xy__; 6: MOV temp[2].zw, input[3].__xy; 7: ADD temp[3].xy, input[0].xy__, temp[3].xy__; 8: MUL temp[3].xy, temp[3].xy__, const[0].xx__; 9: ADD temp[2].zw, input[2].__xy, temp[2].__zw; 10: MUL temp[2].zw, temp[2].__zw, const[0].__xx; 11: TEX temp[4], temp[3].xy__, 2D[0]; 12: TEX temp[3], input[1].xy__, 2D[0]; 13: TEX temp[5], temp[2].zw__, 2D[0]; 14: TEX temp[6], temp[2].xy__, 2D[0]; 15: TEX temp[2], input[0].xy__, 2D[0]; 16: MUL temp[3], temp[3], const[0].zzzz; 17: MAD temp[2], temp[2], const[0].yyyy, temp[3]; 18: MAD temp[2], temp[1], const[0].yyyy, temp[2]; 19: MAD temp[2], temp[0], const[0].zzzz, temp[2]; 20: MAD temp[2], temp[6], const[0].wwww, temp[2]; 21: MAD temp[2], temp[5], const[1].xxxx, temp[2]; 22: MAD output[0], temp[4], const[1].xxxx, temp[2]; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[7], input[3].xy__, 2D[0]; 1: TEX temp[8], input[2].xy__, 2D[0]; 2: MOV temp[9].xy, input[2].xy__; 3: ADD temp[10].xy, input[0].xy__, temp[9].xy__; 4: MUL temp[11].xy, temp[10].xy__, const[0].xx__; 5: MOV temp[12].xy, input[1].xy__; 6: MOV temp[13].zw, input[3].__xy; 7: ADD temp[14].xy, input[0].xy__, temp[12].xy__; 8: MUL temp[15].xy, temp[14].xy__, const[0].xx__; 9: ADD temp[16].zw, input[2].__xy, temp[13].__zw; 10: MUL temp[17].zw, temp[16].__zw, const[0].__xx; 11: TEX temp[18], temp[15].xy__, 2D[0]; 12: TEX temp[19], input[1].xy__, 2D[0]; 13: TEX temp[20], temp[17].zw__, 2D[0]; 14: TEX temp[21], temp[11].xy__, 2D[0]; 15: TEX temp[22], input[0].xy__, 2D[0]; 16: MUL temp[23], temp[19], const[0].zzzz; 17: MAD temp[24], temp[22], const[0].yyyy, temp[23]; 18: MAD temp[25], temp[8], const[0].yyyy, temp[24]; 19: MAD temp[26], temp[7], const[0].zzzz, temp[25]; 20: MAD temp[27], temp[21], const[0].wwww, temp[26]; 21: MAD temp[28], temp[20], const[1].xxxx, temp[27]; 22: MAD output[0], temp[18], const[1].xxxx, temp[28]; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[7], input[3].xy__, 2D[0]; 1: TEX temp[8], input[2].xy__, 2D[0]; 2: MUL temp[11].xy, (input[2] + input[0]).xy__, none.HH__; 3: MUL temp[15].xy, (input[1] + input[0]).xy__, none.HH__; 4: MUL temp[17].zw, (input[3] + input[2]).__xy, none.__HH; 5: TEX temp[18], temp[15].xy__, 2D[0]; 6: TEX temp[19], input[1].xy__, 2D[0]; 7: TEX temp[20], temp[17].zw__, 2D[0]; 8: TEX temp[21], temp[11].xy__, 2D[0]; 9: TEX temp[22], input[0].xy__, 2D[0]; 10: MUL temp[23], temp[19], const[0].zzzz; 11: MAD temp[24], temp[22], const[0].yyyy, temp[23]; 12: MAD temp[25], temp[8], const[0].yyyy, temp[24]; 13: MAD temp[26], temp[7], const[0].zzzz, temp[25]; 14: MAD temp[27], temp[21], const[0].wwww, temp[26]; 15: MAD temp[28], temp[20], const[1].xxxx, temp[27]; 16: MAD output[0], temp[18], const[1].xxxx, temp[28]; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[7], input[3].xy__, 2D[0]; 1: TEX temp[8], input[2].xy__, 2D[0]; 2: MUL temp[11].xy, (input[2] + input[0]).xy__, none.HH__; 3: MUL temp[15].xy, (input[1] + input[0]).xy__, none.HH__; 4: MUL temp[17].zw, (input[3] + input[2]).__xy, none.__HH; 5: TEX temp[18], temp[15].xy__, 2D[0]; 6: TEX temp[19], input[1].xy__, 2D[0]; 7: MOV temp[0].x, temp[17].z___; 8: MOV temp[0].y, temp[17]._w__; 9: TEX temp[20], temp[0].xy__, 2D[0]; 10: TEX temp[21], temp[11].xy__, 2D[0]; 11: TEX temp[22], input[0].xy__, 2D[0]; 12: MUL temp[23], temp[19], const[0].zzzz; 13: MAD temp[24], temp[22], const[0].yyyy, temp[23]; 14: MAD temp[25], temp[8], const[0].yyyy, temp[24]; 15: MAD temp[26], temp[7], const[0].zzzz, temp[25]; 16: MAD temp[27], temp[21], const[0].wwww, temp[26]; 17: MAD temp[28], temp[20], const[1].xxxx, temp[27]; 18: MAD output[0], temp[18], const[1].xxxx, temp[28]; CONST[0] = { 0.5000 0.1500 0.0750 0.3500 } CONST[1] = { 0.1000 0.0000 1.0000 0.0000 } Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[7], input[3].xy__, 2D[0]; 1: TEX temp[8], input[2].xy__, 2D[0]; 2: MUL temp[11].xy, (input[2] + input[0]).xy__, none.HH__; 3: MUL temp[15].xy, (input[1] + input[0]).xy__, none.HH__; 4: MUL temp[17].zw, (input[3] + input[2]).__xy, none.__HH; 5: TEX temp[18], temp[15].xy__, 2D[0]; 6: TEX temp[19], input[1].xy__, 2D[0]; 7: MOV temp[0].x, temp[17].z___; 8: MOV temp[0].y, temp[17]._w__; 9: TEX temp[20], temp[0].xy__, 2D[0]; 10: TEX temp[21], temp[11].xy__, 2D[0]; 11: TEX temp[22], input[0].xy__, 2D[0]; 12: MUL temp[23], temp[19], const[0].zzzz; 13: MAD temp[24], temp[22], const[0].yyyy, temp[23]; 14: MAD temp[25], temp[8], const[0].yyyy, temp[24]; 15: MAD temp[26], temp[7], const[0].zzzz, temp[25]; 16: MAD temp[27], temp[21], const[0].wwww, temp[26]; 17: MAD temp[28], temp[20], const[1].xxxx, temp[27]; 18: MAD output[0], temp[18], const[1].xxxx, temp[28]; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[7], input[3].xy__, 2D[0]; 1: TEX temp[8], input[2].xy__, 2D[0]; 2: src0.xyz = input[0], src1.xyz = input[2], srcp.xyz = (src1 + src0) MAD temp[11].xy, srcp.xy_, src0.HH_, src0.000 3: src0.xyz = input[0], src1.xyz = input[1], srcp.xyz = (src1 + src0) MAD temp[15].xy, srcp.xy_, src0.HH_, src0.000 4: src0.xyz = input[2], src1.xyz = input[3], srcp.xyz = (src1 + src0) MAD temp[17].z, srcp.__x, src0.__H, src0.000 MAD temp[17].w, srcp.y, src0.H, src0.0 5: TEX temp[18], temp[15].xy__, 2D[0]; 6: TEX temp[19], input[1].xy__, 2D[0]; 7: src0.xyz = temp[17] MAD temp[0].x, src0.z__, src0.111, src0.000 8: src0.w = temp[17] MAD temp[0].y, src0._w_, src0.111, src0.000 9: TEX temp[20], temp[0].xy__, 2D[0]; 10: TEX temp[21], temp[11].xy__, 2D[0]; 11: TEX temp[22], input[0].xy__, 2D[0]; 12: src0.xyz = temp[19], src0.w = temp[19], src1.xyz = const[0] MAD temp[23].xyz, src0.xyz, src1.zzz, src0.000 MAD temp[23].w, src0.w, src1.z, src0.0 13: src0.xyz = temp[22], src0.w = temp[22], src1.xyz = const[0], src1.w = temp[23], src2.xyz = temp[23] MAD temp[24].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[24].w, src0.w, src1.y, src1.w 14: src0.xyz = temp[8], src0.w = temp[8], src1.xyz = const[0], src1.w = temp[24], src2.xyz = temp[24] MAD temp[25].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[25].w, src0.w, src1.y, src1.w 15: src0.xyz = temp[7], src0.w = temp[7], src1.xyz = const[0], src1.w = temp[25], src2.xyz = temp[25] MAD temp[26].xyz, src0.xyz, src1.zzz, src2.xyz MAD temp[26].w, src0.w, src1.z, src1.w 16: src0.xyz = temp[21], src0.w = temp[21], src1.xyz = temp[26], src1.w = const[0], src2.w = temp[26] MAD temp[27].xyz, src0.xyz, src1.www, src1.xyz MAD temp[27].w, src0.w, src1.w, src2.w 17: src0.xyz = temp[20], src0.w = temp[20], src1.xyz = const[1], src1.w = temp[27], src2.xyz = temp[27] MAD temp[28].xyz, src0.xyz, src1.xxx, src2.xyz MAD temp[28].w, src0.w, src1.x, src1.w 18: src0.xyz = temp[18], src0.w = temp[18], src1.xyz = const[1], src1.w = temp[28], src2.xyz = temp[28] MAD color[0].xyz, src0.xyz, src1.xxx, src2.xyz MAD color[0].w, src0.w, src1.x, src1.w Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[2], src1.xyz = input[3], srcp.xyz = (src1 + src0) MAD temp[17].z, srcp.__x, src0.__H, src0.000 MAD temp[17].w, srcp.y, src0.H, src0.0 1: src0.xyz = input[0], src1.xyz = input[2], srcp.xyz = (src1 + src0) MAD temp[11].xy, srcp.xy_, src0.HH_, src0.000 2: src0.xyz = input[0], src1.xyz = input[1], srcp.xyz = (src1 + src0) MAD temp[15].xy, srcp.xy_, src0.HH_, src0.000 3: src0.xyz = temp[17] MAD temp[0].x, src0.z__, src0.111, src0.000 4: src0.w = temp[17] MAD temp[0].y, src0._w_, src0.111, src0.000 5: BEGIN_TEX; 6: TEX temp[7], input[3].xy__, 2D[0]; 7: TEX temp[8], input[2].xy__, 2D[0]; 8: TEX temp[19], input[1].xy__, 2D[0]; 9: TEX temp[22], input[0].xy__, 2D[0]; 10: TEX temp[21], temp[11].xy__, 2D[0]; 11: TEX temp[18], temp[15].xy__, 2D[0]; 12: TEX temp[20], temp[0].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 13: src0.xyz = temp[19], src0.w = temp[19], src1.xyz = const[0] SEM_WAIT MAD temp[23].xyz, src0.xyz, src1.zzz, src0.000 MAD temp[23].w, src0.w, src1.z, src0.0 14: src0.xyz = temp[22], src0.w = temp[22], src1.xyz = const[0], src1.w = temp[23], src2.xyz = temp[23] MAD temp[24].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[24].w, src0.w, src1.y, src1.w 15: src0.xyz = temp[8], src0.w = temp[8], src1.xyz = const[0], src1.w = temp[24], src2.xyz = temp[24] MAD temp[25].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[25].w, src0.w, src1.y, src1.w 16: src0.xyz = temp[7], src0.w = temp[7], src1.xyz = const[0], src1.w = temp[25], src2.xyz = temp[25] MAD temp[26].xyz, src0.xyz, src1.zzz, src2.xyz MAD temp[26].w, src0.w, src1.z, src1.w 17: src0.xyz = temp[21], src0.w = temp[21], src1.xyz = temp[26], src1.w = const[0], src2.w = temp[26] MAD temp[27].xyz, src0.xyz, src1.www, src1.xyz MAD temp[27].w, src0.w, src1.w, src2.w 18: src0.xyz = temp[20], src0.w = temp[20], src1.xyz = const[1], src1.w = temp[27], src2.xyz = temp[27] MAD temp[28].xyz, src0.xyz, src1.xxx, src2.xyz MAD temp[28].w, src0.w, src1.x, src1.w 19: src0.xyz = temp[18], src0.w = temp[18], src1.xyz = const[1], src1.w = temp[28], src2.xyz = temp[28] MAD color[0].xyz, src0.xyz, src1.xxx, src2.xyz MAD color[0].w, src0.w, src1.x, src1.w Fragment Program: after 'dead sources' # Radeon Compiler Program 0: src0.xyz = input[2], src1.xyz = input[3], srcp.xyz = (src1 + src0) MAD temp[17].z, srcp.__x, src0.__H, src0.000 MAD temp[17].w, srcp.y, src0.H, src0.0 1: src0.xyz = input[0], src1.xyz = input[2], srcp.xyz = (src1 + src0) MAD temp[11].xy, srcp.xy_, src0.HH_, src0.000 2: src0.xyz = input[0], src1.xyz = input[1], srcp.xyz = (src1 + src0) MAD temp[15].xy, srcp.xy_, src0.HH_, src0.000 3: src0.xyz = temp[17] MAD temp[0].x, src0.z__, src0.111, src0.000 4: src0.w = temp[17] MAD temp[0].y, src0._w_, src0.111, src0.000 5: BEGIN_TEX; 6: TEX temp[7], input[3].xy__, 2D[0]; 7: TEX temp[8], input[2].xy__, 2D[0]; 8: TEX temp[19], input[1].xy__, 2D[0]; 9: TEX temp[22], input[0].xy__, 2D[0]; 10: TEX temp[21], temp[11].xy__, 2D[0]; 11: TEX temp[18], temp[15].xy__, 2D[0]; 12: TEX temp[20], temp[0].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 13: src0.xyz = temp[19], src0.w = temp[19], src1.xyz = const[0] SEM_WAIT MAD temp[23].xyz, src0.xyz, src1.zzz, src0.000 MAD temp[23].w, src0.w, src1.z, src0.0 14: src0.xyz = temp[22], src0.w = temp[22], src1.xyz = const[0], src1.w = temp[23], src2.xyz = temp[23] MAD temp[24].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[24].w, src0.w, src1.y, src1.w 15: src0.xyz = temp[8], src0.w = temp[8], src1.xyz = const[0], src1.w = temp[24], src2.xyz = temp[24] MAD temp[25].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[25].w, src0.w, src1.y, src1.w 16: src0.xyz = temp[7], src0.w = temp[7], src1.xyz = const[0], src1.w = temp[25], src2.xyz = temp[25] MAD temp[26].xyz, src0.xyz, src1.zzz, src2.xyz MAD temp[26].w, src0.w, src1.z, src1.w 17: src0.xyz = temp[21], src0.w = temp[21], src1.xyz = temp[26], src1.w = const[0], src2.w = temp[26] MAD temp[27].xyz, src0.xyz, src1.www, src1.xyz MAD temp[27].w, src0.w, src1.w, src2.w 18: src0.xyz = temp[20], src0.w = temp[20], src1.xyz = const[1], src1.w = temp[27], src2.xyz = temp[27] MAD temp[28].xyz, src0.xyz, src1.xxx, src2.xyz MAD temp[28].w, src0.w, src1.x, src1.w 19: src0.xyz = temp[18], src0.w = temp[18], src1.xyz = const[1], src1.w = temp[28], src2.xyz = temp[28] MAD color[0].xyz, src0.xyz, src1.xxx, src2.xyz MAD color[0].w, src0.w, src1.x, src1.w Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = input[2], src1.xyz = input[3], srcp.xyz = (src1 + src0) MAD temp[0].z, srcp.__x, src0.__H, src0.__0 MAD temp[0].w, srcp.y, src0.H, src0.0 1: src0.xyz = input[0], src1.xyz = input[2], srcp.xyz = (src1 + src0) MAD temp[4].xy, srcp.xy_, src0.HH_, src0.00_ 2: src0.xyz = input[0], src1.xyz = input[1], srcp.xyz = (src1 + src0) MAD temp[5].xy, srcp.xy_, src0.HH_, src0.00_ 3: src0.xyz = temp[0] MAD temp[6].x, src0.z__, src0.11_, src0.00_ 4: src0.w = temp[0] MAD temp[6].y, src0._w_, src0.11_, src0.00_ 5: BEGIN_TEX; 6: TEX temp[3], input[3].xy__, 2D[0]; 7: TEX temp[2], input[2].xy__, 2D[0]; 8: TEX temp[1], input[1].xy__, 2D[0]; 9: TEX temp[0], input[0].xy__, 2D[0]; 10: TEX temp[4], temp[4].xy__, 2D[0]; 11: TEX temp[5], temp[5].xy__, 2D[0]; 12: TEX temp[6], temp[6].xy__, 2D[0] SEM_WAIT SEM_ACQUIRE; 13: src0.xyz = temp[1], src0.w = temp[1], src1.xyz = const[0] SEM_WAIT MAD temp[1].xyz, src0.xyz, src1.zzz, src0.000 MAD temp[1].w, src0.w, src1.z, src0.0 14: src0.xyz = temp[0], src0.w = temp[0], src1.xyz = const[0], src1.w = temp[1], src2.xyz = temp[1] MAD temp[0].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[0].w, src0.w, src1.y, src1.w 15: src0.xyz = temp[2], src0.w = temp[2], src1.xyz = const[0], src1.w = temp[0], src2.xyz = temp[0] MAD temp[0].xyz, src0.xyz, src1.yyy, src2.xyz MAD temp[0].w, src0.w, src1.y, src1.w 16: src0.xyz = temp[3], src0.w = temp[3], src1.xyz = const[0], src1.w = temp[0], src2.xyz = temp[0] MAD temp[0].xyz, src0.xyz, src1.zzz, src2.xyz MAD temp[0].w, src0.w, src1.z, src1.w 17: src0.xyz = temp[4], src0.w = temp[4], src1.xyz = temp[0], src1.w = const[0], src2.w = temp[0] MAD temp[0].xyz, src0.xyz, src1.www, src1.xyz MAD temp[0].w, src0.w, src1.w, src2.w 18: src0.xyz = temp[6], src0.w = temp[6], src1.xyz = const[1], src1.w = temp[0], src2.xyz = temp[0] MAD temp[0].xyz, src0.xyz, src1.xxx, src2.xyz MAD temp[0].w, src0.w, src1.x, src1.w 19: src0.xyz = temp[5], src0.w = temp[5], src1.xyz = const[1], src1.w = temp[0], src2.xyz = temp[0] MAD color[0].xyz, src0.xyz, src1.xxx, src2.xyz MAD color[0].w, src0.w, src1.x, src1.w r300compiler error: compiler/r300_fragprog_emit.c::emit_alu(): Too many ALU instructions r300 FP: Compiler Error: compiler/r300_fragprog_emit.c::emit_alu(): Too many ALU instructions Using a dummy shader instead. r300: Initial fragment program FRAG DCL OUT[0], COLOR IMM FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxy 1: END Fragment Program: before compilation # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'register rename' # Radeon Compiler Program 0: MOV output[0], temp[0].0001; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[0], none.0001; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: MOV output[0], none.0001; Fragment Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[0], none.0001; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'dead sources' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 Fragment Program: after 'register allocation' # Radeon Compiler Program 0: MAD color[0].xyz, src0.000, src0.111, src0.000 MAD color[0].w, src0.1, src0.1, src0.0 pc=18************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00400000) 0: xyz: t0 t0 t0 bias-> o0.xyz (1c000000) w: t0 t0 t0 bias-> o0.w (01000000) xyz: 0.0 1.0 0.0 op: 00050a94 w: 1.0 1.0 0.0 op: 00040891 r300: Initial vertex program VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL OUT[2], GENERIC[0] DCL OUT[3], GENERIC[1] DCL CONST[0..3] 0: MOV OUT[1].xyz, IN[1] 1: MOV OUT[2].xy, IN[2] 2: MOV OUT[3].xy, IN[3] 3: DP4 OUT[0].w, IN[0], CONST[0] 4: DP4 OUT[0].z, IN[0], CONST[1] 5: DP4 OUT[0].y, IN[0], CONST[2] 6: DP4 OUT[0].x, IN[0], CONST[3] 7: END Vertex Program: before compilation # Radeon Compiler Program 0: MOV output[1].xyz, input[1]; 1: MOV output[2].xy, input[2]; 2: MOV output[3].xy, input[3]; 3: DP4 temp[0].w, input[0], const[0]; 4: DP4 temp[0].z, input[0], const[1]; 5: DP4 temp[0].y, input[0], const[2]; 6: DP4 temp[0].x, input[0], const[3]; 7: MOV output[0], temp[0]; 8: MOV output[4], temp[0]; Vertex Program: after 'transform loops' # Radeon Compiler Program 0: MOV output[1].xyz, input[1]; 1: MOV output[2].xy, input[2]; 2: MOV output[3].xy, input[3]; 3: DP4 temp[0].w, input[0], const[0]; 4: DP4 temp[0].z, input[0], const[1]; 5: DP4 temp[0].y, input[0], const[2]; 6: DP4 temp[0].x, input[0], const[3]; 7: MOV output[0], temp[0]; 8: MOV output[4], temp[0]; Vertex Program: after 'emulate branches' # Radeon Compiler Program 0: MOV output[1].xyz, input[1]; 1: MOV output[2].xy, input[2]; 2: MOV output[3].xy, input[3]; 3: DP4 temp[0].w, input[0], const[0]; 4: DP4 temp[0].z, input[0], const[1]; 5: DP4 temp[0].y, input[0], const[2]; 6: DP4 temp[0].x, input[0], const[3]; 7: MOV output[0], temp[0]; 8: MOV output[4], temp[0]; Vertex Program: after 'emulate negative addressing' # Radeon Compiler Program 0: MOV output[1].xyz, input[1]; 1: MOV output[2].xy, input[2]; 2: MOV output[3].xy, input[3]; 3: DP4 temp[0].w, input[0], const[0]; 4: DP4 temp[0].z, input[0], const[1]; 5: DP4 temp[0].y, input[0], const[2]; 6: DP4 temp[0].x, input[0], const[3]; 7: MOV output[0], temp[0]; 8: MOV output[4], temp[0]; Vertex Program: after 'native rewrite' # Radeon Compiler Program 0: MOV output[1].xyz, input[1]; 1: MOV output[2].xy, input[2]; 2: MOV output[3].xy, input[3]; 3: DP4 temp[0].w, input[0], const[0]; 4: DP4 temp[0].z, input[0], const[1]; 5: DP4 temp[0].y, input[0], const[2]; 6: DP4 temp[0].x, input[0], const[3]; 7: MOV output[0], temp[0]; 8: MOV output[4], temp[0]; Vertex Program: after 'emulate modifiers' # Radeon Compiler Program 0: MOV output[1].xyz, input[1]; 1: MOV output[2].xy, input[2]; 2: MOV output[3].xy, input[3]; 3: DP4 temp[0].w, input[0], const[0]; 4: DP4 temp[0].z, input[0], const[1]; 5: DP4 temp[0].y, input[0], const[2]; 6: DP4 temp[0].x, input[0], const[3]; 7: MOV output[0], temp[0]; 8: MOV output[4], temp[0]; Vertex Program: after 'deadcode' # Radeon Compiler Program 0: MOV output[1].xyz, input[1].xyz_; 1: MOV output[2].xy, input[2].xy__; 2: MOV output[3].xy, input[3].xy__; 3: DP4 temp[0].w, input[0], const[0]; 4: DP4 temp[0].z, input[0], const[1]; 5: DP4 temp[0].y, input[0], const[2]; 6: DP4 temp[0].x, input[0], const[3]; 7: MOV output[0], temp[0]; 8: MOV output[4], temp[0]; Vertex Program: after 'dataflow optimize' # Radeon Compiler Program 0: MOV output[1].xyz, input[1].xyz_; 1: MOV output[2].xy, input[2].xy__; 2: MOV output[3].xy, input[3].xy__; 3: DP4 temp[0].w, input[0], const[0]; 4: DP4 temp[0].z, input[0], const[1]; 5: DP4 temp[0].y, input[0], const[2]; 6: DP4 temp[0].x, input[0], const[3]; 7: MOV output[0], temp[0]; 8: MOV output[4], temp[0]; Vertex Program: after 'source conflict resolve' # Radeon Compiler Program 0: MOV output[1].xyz, input[1].xyz_; 1: MOV output[2].xy, input[2].xy__; 2: MOV output[3].xy, input[3].xy__; 3: DP4 temp[0].w, input[0], const[0]; 4: DP4 temp[0].z, input[0], const[1]; 5: DP4 temp[0].y, input[0], const[2]; 6: DP4 temp[0].x, input[0], const[3]; 7: MOV output[0], temp[0]; 8: MOV output[4], temp[0]; Vertex Program: after 'register allocation' # Radeon Compiler Program 0: MOV output[1].xyz, input[1].xyz_; 1: MOV output[2].xy, input[2].xy__; 2: MOV output[3].xy, input[3].xy__; 3: DP4 temp[0].w, input[0], const[0]; 4: DP4 temp[0].z, input[0], const[1]; 5: DP4 temp[0].y, input[0], const[2]; 6: DP4 temp[0].x, input[0], const[3]; 7: MOV output[0], temp[0]; 8: MOV output[4], temp[0]; Vertex Program: after 'dead constants' # Radeon Compiler Program 0: MOV output[1].xyz, input[1].xyz_; 1: MOV output[2].xy, input[2].xy__; 2: MOV output[3].xy, input[3].xy__; 3: DP4 temp[0].w, input[0], const[0]; 4: DP4 temp[0].z, input[0], const[1]; 5: DP4 temp[0].y, input[0], const[2]; 6: DP4 temp[0].x, input[0], const[3]; 7: MOV output[0], temp[0]; 8: MOV output[4], temp[0]; Final vertex program code: 0: op: 0x00702203 dst: 1o op: VE_ADD src0: 0x01d10021 reg: 1i swiz: X/ Y/ Z/ U src1: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 src2: 0x01248021 reg: 1i swiz: 0/ 0/ 0/ 0 1: op: 0x00304203 dst: 2o op: VE_ADD src0: 0x01f90041 reg: 2i swiz: X/ Y/ U/ U src1: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 src2: 0x01248041 reg: 2i swiz: 0/ 0/ 0/ 0 2: op: 0x00306203 dst: 3o op: VE_ADD src0: 0x01f90061 reg: 3i swiz: X/ Y/ U/ U src1: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 src2: 0x01248061 reg: 3i swiz: 0/ 0/ 0/ 0 3: op: 0x00800001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10002 reg: 0c swiz: X/ Y/ Z/ W src2: 0x01248002 reg: 0c swiz: 0/ 0/ 0/ 0 4: op: 0x00400001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10022 reg: 1c swiz: X/ Y/ Z/ W src2: 0x01248022 reg: 1c swiz: 0/ 0/ 0/ 0 5: op: 0x00200001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10042 reg: 2c swiz: X/ Y/ Z/ W src2: 0x01248042 reg: 2c swiz: 0/ 0/ 0/ 0 6: op: 0x00100001 dst: 0t op: VE_DOT_PRODUCT src0: 0x00d10001 reg: 0i swiz: X/ Y/ Z/ W src1: 0x00d10062 reg: 3c swiz: X/ Y/ Z/ W src2: 0x01248062 reg: 3c swiz: 0/ 0/ 0/ 0 7: op: 0x00f00203 dst: 0o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 8: op: 0x00f08203 dst: 4o op: VE_ADD src0: 0x00d10000 reg: 0t swiz: X/ Y/ Z/ W src1: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 src2: 0x01248000 reg: 0t swiz: 0/ 0/ 0/ 0 Flow Control Ops: 0x00000000 r300: Initial fragment program FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], PERSPECTIVE DCL IN[1], GENERIC[1], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL TEMP[0..1] IMM FLT32 { 0.3000, 0.5800, 0.1200, 1.0000} 0: TEX TEMP[0].xyz, IN[0], SAMP[0], 2D 1: TEX TEMP[1].xyz, IN[1], SAMP[1], RECT 2: MUL TEMP[0].xyz, TEMP[0], TEMP[0] 3: DP3 TEMP[0].w, TEMP[0], IMM[0] 4: MAD OUT[0].xyz, TEMP[0], TEMP[0].wwww, TEMP[1] 5: END Fragment Program: before compilation # Radeon Compiler Program 0: TEX temp[0].xyz, input[0], 2D[0]; 1: TEX temp[1].xyz, input[1], RECT[1]; 2: MUL temp[0].xyz, temp[0], temp[0]; 3: DP3 temp[0].w, temp[0], const[0]; 4: MAD output[0].xyz, temp[0], temp[0].wwww, temp[1]; Fragment Program: after 'rewrite depth out' # Radeon Compiler Program 0: TEX temp[0].xyz, input[0], 2D[0]; 1: TEX temp[1].xyz, input[1], RECT[1]; 2: MUL temp[0].xyz, temp[0], temp[0]; 3: DP3 temp[0].w, temp[0], const[0]; 4: MAD output[0].xyz, temp[0], temp[0].wwww, temp[1]; Fragment Program: after 'transform KILP' # Radeon Compiler Program 0: TEX temp[0].xyz, input[0], 2D[0]; 1: TEX temp[1].xyz, input[1], RECT[1]; 2: MUL temp[0].xyz, temp[0], temp[0]; 3: DP3 temp[0].w, temp[0], const[0]; 4: MAD output[0].xyz, temp[0], temp[0].wwww, temp[1]; Fragment Program: after 'transform loops' # Radeon Compiler Program 0: TEX temp[0].xyz, input[0], 2D[0]; 1: TEX temp[1].xyz, input[1], RECT[1]; 2: MUL temp[0].xyz, temp[0], temp[0]; 3: DP3 temp[0].w, temp[0], const[0]; 4: MAD output[0].xyz, temp[0], temp[0].wwww, temp[1]; Fragment Program: after 'emulate branches' # Radeon Compiler Program 0: TEX temp[0].xyz, input[0], 2D[0]; 1: TEX temp[1].xyz, input[1], RECT[1]; 2: MUL temp[0].xyz, temp[0], temp[0]; 3: DP3 temp[0].w, temp[0], const[0]; 4: MAD output[0].xyz, temp[0], temp[0].wwww, temp[1]; Fragment Program: after 'transform TEX' # Radeon Compiler Program 0: TEX temp[2], input[0], 2D[0]; 1: MOV temp[0].xyz, temp[2]; 2: MUL temp[3], input[1], const[1]; 3: TEX temp[4], temp[3], 2D[1]; 4: MOV temp[1].xyz, temp[4]; 5: MUL temp[0].xyz, temp[0], temp[0]; 6: DP3 temp[0].w, temp[0], const[0]; 7: MAD output[0].xyz, temp[0], temp[0].wwww, temp[1]; Fragment Program: after 'native rewrite' # Radeon Compiler Program 0: TEX temp[2], input[0], 2D[0]; 1: MOV temp[0].xyz, temp[2]; 2: MUL temp[3], input[1], const[1]; 3: TEX temp[4], temp[3], 2D[1]; 4: MOV temp[1].xyz, temp[4]; 5: MUL temp[0].xyz, temp[0], temp[0]; 6: DP3 temp[0].w, temp[0], const[0]; 7: MAD output[0].xyz, temp[0], temp[0].wwww, temp[1]; Fragment Program: after 'deadcode' # Radeon Compiler Program 0: TEX temp[2].xyz, input[0].xy__, 2D[0]; 1: MOV temp[0].xyz, temp[2].xyz_; 2: MUL temp[3].xy, input[1].xy__, const[1].xy__; 3: TEX temp[4].xyz, temp[3].xy__, 2D[1]; 4: MOV temp[1].xyz, temp[4].xyz_; 5: MUL temp[0].xyz, temp[0].xyz_, temp[0].xyz_; 6: DP3 temp[0].w, temp[0].xyz_, const[0].xyz_; 7: MAD output[0].xyz, temp[0].xyz_, temp[0].www_, temp[1].xyz_; Fragment Program: after 'emulate loops' # Radeon Compiler Program 0: TEX temp[2].xyz, input[0].xy__, 2D[0]; 1: MOV temp[0].xyz, temp[2].xyz_; 2: MUL temp[3].xy, input[1].xy__, const[1].xy__; 3: TEX temp[4].xyz, temp[3].xy__, 2D[1]; 4: MOV temp[1].xyz, temp[4].xyz_; 5: MUL temp[0].xyz, temp[0].xyz_, temp[0].xyz_; 6: DP3 temp[0].w, temp[0].xyz_, const[0].xyz_; 7: MAD output[0].xyz, temp[0].xyz_, temp[0].www_, temp[1].xyz_; Fragment Program: after 'register rename' # Radeon Compiler Program 0: TEX temp[5].xyz, input[0].xy__, 2D[0]; 1: MOV temp[6].xyz, temp[5].xyz_; 2: MUL temp[7].xy, input[1].xy__, const[1].xy__; 3: TEX temp[8].xyz, temp[7].xy__, 2D[1]; 4: MOV temp[9].xyz, temp[8].xyz_; 5: MUL temp[10].xyz, temp[6].xyz_, temp[6].xyz_; 6: DP3 temp[11].w, temp[10].xyz_, const[0].xyz_; 7: MAD output[0].xyz, temp[10].xyz_, temp[11].www_, temp[9].xyz_; Fragment Program: after 'dataflow optimize' # Radeon Compiler Program 0: TEX temp[5].xyz, input[0].xy__, 2D[0]; 1: MUL temp[7].xy, input[1].xy__, const[1].xy__; 2: TEX temp[8].xyz, temp[7].xy__, 2D[1]; 3: MUL temp[10].xyz, temp[5].xyz_, temp[5].xyz_; 4: DP3 temp[11].w, temp[10].xyz_, const[0].xyz_; 5: MAD output[0].xyz, temp[10].xyz_, temp[11].www_, temp[8].xyz_; Fragment Program: after 'dataflow swizzles' # Radeon Compiler Program 0: TEX temp[5].xyz, input[0].xy__, 2D[0]; 1: MUL temp[7].xy, input[1].xy__, const[1].xy__; 2: TEX temp[8].xyz, temp[7].xy__, 2D[1]; 3: MUL temp[10].xyz, temp[5].xyz_, temp[5].xyz_; 4: DP3 temp[11].w, temp[10].xyz_, const[0].xyz_; 5: MAD output[0].xyz, temp[10].xyz_, temp[11].www_, temp[8].xyz_; CONST[0] = { 0.3000 0.5800 0.1200 1.0000 } Fragment Program: after 'dead constants' # Radeon Compiler Program 0: TEX temp[5].xyz, input[0].xy__, 2D[0]; 1: MUL temp[7].xy, input[1].xy__, const[1].xy__; 2: TEX temp[8].xyz, temp[7].xy__, 2D[1]; 3: MUL temp[10].xyz, temp[5].xyz_, temp[5].xyz_; 4: DP3 temp[11].w, temp[10].xyz_, const[0].xyz_; 5: MAD output[0].xyz, temp[10].xyz_, temp[11].www_, temp[8].xyz_; Fragment Program: after 'pair translate' # Radeon Compiler Program 0: TEX temp[5].xyz, input[0].xy__, 2D[0]; 1: src0.xyz = input[1], src1.xyz = const[1] MAD temp[7].xy, src0.xy_, src1.xy_, src0.000 2: TEX temp[8].xyz, temp[7].xy__, 2D[1]; 3: src0.xyz = temp[5] MAD temp[10].xyz, src0.xyz, src0.xyz, src0.000 4: src0.xyz = temp[10], src1.xyz = const[0] DP3, src0.xyz, src1.xyz DP3 temp[11].w, src0._, src0._ 5: src0.xyz = temp[10], src0.w = temp[11], src1.xyz = temp[8] MAD color[0].xyz, src0.xyz, src0.www, src1.xyz Fragment Program: after 'pair scheduling' # Radeon Compiler Program 0: src0.xyz = input[1], src1.xyz = const[1] MAD temp[7].xy, src0.xy_, src1.xy_, src0.000 1: BEGIN_TEX; 2: TEX temp[5].xyz, input[0].xy__, 2D[0]; 3: TEX temp[8].xyz, temp[7].xy__, 2D[1] SEM_WAIT SEM_ACQUIRE; 4: src0.xyz = temp[5] SEM_WAIT MAD temp[10].xyz, src0.xyz, src0.xyz, src0.000 5: src0.xyz = temp[10], src1.xyz = const[0] DP3, src0.xyz, src1.xyz DP3 temp[11].w, src0._, src0._ 6: src0.xyz = temp[10], src0.w = temp[11], src1.xyz = temp[8] MAD color[0].xyz, src0.xyz, src0.www, src1.xyz Fragment Program: after 'dead sources' # Radeon Compiler Program 0: src0.xyz = input[1], src1.xyz = const[1] MAD temp[7].xy, src0.xy_, src1.xy_, src0.000 1: BEGIN_TEX; 2: TEX temp[5].xyz, input[0].xy__, 2D[0]; 3: TEX temp[8].xyz, temp[7].xy__, 2D[1] SEM_WAIT SEM_ACQUIRE; 4: src0.xyz = temp[5] SEM_WAIT MAD temp[10].xyz, src0.xyz, src0.xyz, src0.000 5: src0.xyz = temp[10], src1.xyz = const[0] DP3, src0.xyz, src1.xyz DP3 temp[11].w, src0._, src0._ 6: src0.xyz = temp[10], src0.w = temp[11], src1.xyz = temp[8] MAD color[0].xyz, src0.xyz, src0.www, src1.xyz Fragment Program: after 'register allocation' # Radeon Compiler Program 0: src0.xyz = input[1], src1.xyz = const[1] MAD temp[1].xy, src0.xy_, src1.xy_, src0.00_ 1: BEGIN_TEX; 2: TEX temp[0].xyz, input[0].xy__, 2D[0]; 3: TEX temp[1].xyz, temp[1].xy__, 2D[1] SEM_WAIT SEM_ACQUIRE; 4: src0.xyz = temp[0] SEM_WAIT MAD temp[0].xyz, src0.xyz, src0.xyz, src0.000 5: src0.xyz = temp[0], src1.xyz = const[0] DP3, src0.xyz, src1.xyz DP3 temp[0].w, src0._, src0._ 6: src0.xyz = temp[0], src0.w = temp[0], src1.xyz = temp[1] MAD color[0].xyz, src0.xyz, src0.www, src1.xyz pc=19************************************* Hardware program ---------------- NODE 0: alu_offset: 0, tex_offset: 0, alu_end: 0, tex_end: 0 (code_addr: 00000000) 0: xyz: t1 c1 t0 bias-> t1.xy (01840841) w: t0 t0 t0 bias-> (00000000) xyz: t1.xyz c1.xyz 0.0 op: 00050200 w: t1.x t1.x t1.x op: 00000000 NODE 1: alu_offset: 1, tex_offset: 0, alu_end: 2, tex_end: 1 (code_addr: 00420081) TEX: TEX t0, t0, texture[0] (00008000) TEX t1, t1, texture[1] (00008841) 1: xyz: t0 t0 t0 bias-> t0.xyz (03800000) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz t0.xyz 0.0 op: 00050000 w: t0.x t0.x t0.x op: 00000000 2: xyz: t0 c0 t0 bias-> (00000800) w: t0 t0 t0 bias-> t0.w (00800000) xyz: t0.xyz c0.xyz t0.xxx op: 00804200 w: 1.0 1.0 t0.x op: 00800891 3: xyz: t0 t1 t0 bias-> o0.xyz (1c000040) w: t0 t0 t0 bias-> (00000000) xyz: t0.xyz t0.www t1.xyz op: 00010600 w: t0.x t0.x t0.x op: 00000000