> > ===== SHADER #200 ========================================== PS/RS880/R600 ===== ===== SHADER #200 ========================================== PS/RS880/R600 ===== ===== 128 dw ===== 7 gprs ===== 0 stack ======================================== | ===== 128 dw ===== 8 gprs ===== 0 stack ======================================== 0000 0000000e a0080000 ALU 3 @28 0000 0000000e a0080000 ALU 3 @28 0028 80000c00 60006610 1 t: RECIP_IEEE R0.w, R0.w | 0028 80000c00 00806610 1 t: RECIP_IEEE R4.x, R0.w 0030 001fe000 00800210 2 x: MUL_IEEE R4.x, R0.x, PS | 0030 001fe000 00a00210 2 x: MUL_IEEE R5.x, R0.x, PS 0032 801fe400 20800210 y: MUL_IEEE R4.y, R0.y, PS | 0032 801fe400 20a00210 y: MUL_IEEE R5.y, R0.y, PS 0002 00000008 80800000 TEX 1 @16 0002 00000008 80800000 TEX 1 @16 0016 00041110 f00d1005 fc808000 SAMPLE R5.xyzw, R4.xy__, RID:17, SID:1 CT:NNNN | 0016 00051110 f00d1004 fc808000 SAMPLE R4.xyzw, R5.xy__, RID:17, SID:1 CT:NNNN 0004 00000011 a0800000 ALU 33 @34 0004 00000011 a0800000 ALU 33 @34 0034 0080a405 60000010 3 w: ADD R0.w, R5.y, R5.y | 0034 00808404 60c00010 3 w: ADD R6.w, R4.y, R4.y 0036 8000a005 60200010 t: ADD R1.w, R5.x, R5.x | 0036 80008004 60800010 t: ADD R4.w, R4.x, R4.x 0038 001fa0ff 00000010 4 x: ADD R0.x, PS, [0xbf800000 -1].x | 0038 001fa0ff 00800010 4 x: ADD R4.x, PS, [0xbf800000 -1].x 0040 801facfe 20000010 y: ADD R0.y, PV.w, [0xbf800000 -1].x | 0040 801facfe 20800010 y: ADD R4.y, PV.w, [0xbf800000 -1].x 0042 bf800000 0042 bf800000 0044 00000000 00005000 5 x: DOT4 __.x, R0.x, R0.x | 0044 00008004 00805000 5 x: DOT4 __.x, R4.x, R4.x 0046 00800400 20005000 y: DOT4 __.y, R0.y, R0.y | 0046 00808404 20805000 y: DOT4 __.y, R4.y, R4.y 0048 001f00f8 40005000 z: DOT4 __.z, 0, 0 | 0048 001f00f8 40805010 z: DOT4 R4.z, 0, 0 0050 801f00f8 60005010 w: DOT4 R0.w, 0, 0 | 0050 801f00f8 60805000 w: DOT4 __.w, 0, 0 0052 801f30fe e0000010 6 w: ADD_sat R0.w, -PV.x, 1.0 | 0052 801f30fe e0800010 6 w: ADD_sat R4.w, -PV.x, 1.0 0054 80000cfe 60206710 7 t: RECIPSQRT_CLAMPED R1.w, PV.w | 0054 80000cfe 40806710 7 t: RECIPSQRT_CLAMPED R4.z, PV.w 0056 00000001 60400210 8 w: MUL_IEEE R2.w, R1.x, R0.x | 0056 00008001 60c00210 8 w: MUL_IEEE R6.w, R1.x, R4.x 0058 818000ff 60200210 t: MUL_IEEE R1.w, PS, R0.w | 0058 818080ff 60e00210 t: MUL_IEEE R7.w, PS, R4.w 0060 00000401 40a00210 9 z: MUL_IEEE R5.z, R1.y, R0.x | 0060 00008401 40800210 9 z: MUL_IEEE R4.z, R1.y, R4.x 0062 001f1c00 600340ff w: CNDGE R0.w, -R0.w, 0, PS | 0062 001f1c04 608340ff w: CNDGE R4.w, -R4.w, 0, PS 0064 80800002 60228cfe t: MULADD_IEEE R1.w, R2.x, R0.y, PV.w | 0064 80808002 60c28cfe t: MULADD_IEEE R6.w, R2.x, R4.y, PV.w 0066 019fc003 00c280ff 10 x: MULADD_IEEE R6.x, R3.x, PV.w, PS 0066 019fc003 00c280ff 10 x: MULADD_IEEE R6.x, R3.x, PV.w, PS 0068 00000801 60200210 w: MUL_IEEE R1.w, R1.z, R0.x | 0068 00008801 60200210 w: MUL_IEEE R1.w, R1.z, R4.x 0070 80800402 604288fe t: MULADD_IEEE R2.w, R2.y, R0.y, PV.z | 0070 80808402 60e288fe t: MULADD_IEEE R7.w, R2.y, R4.y, PV.z 0072 01800403 20c280ff 11 y: MULADD_IEEE R6.y, R3.y, R0.w, PS | 0072 01808403 20c280ff 11 y: MULADD_IEEE R6.y, R3.y, R4.w, PS 0074 80800802 60228cfe w: MULADD_IEEE R1.w, R2.z, R0.y, PV.w | 0074 80808802 60228cfe w: MULADD_IEEE R1.w, R2.z, R4.y, PV.w 0076 01800803 40c28cfe 12 z: MULADD_IEEE R6.z, R3.z, R0.w, PV.w | 0076 01808803 40c28cfe 12 z: MULADD_IEEE R6.z, R3.z, R4.w, PV.w 0078 800000f8 60c01910 w: MOV R6.w, 0 0078 800000f8 60c01910 w: MOV R6.w, 0 0080 0080c806 00205210 13 x: CUBE R1.x, R6.z, R6.y 0080 0080c806 00205210 13 x: CUBE R1.x, R6.z, R6.y 0082 0000c806 20205210 y: CUBE R1.y, R6.z, R6.x 0082 0000c806 20205210 y: CUBE R1.y, R6.z, R6.x 0084 0100c006 40205210 z: CUBE R1.z, R6.x, R6.z 0084 0100c006 40205210 z: CUBE R1.z, R6.x, R6.z 0086 8100c406 60205210 w: CUBE R1.w, R6.y, R6.z 0086 8100c406 60205210 w: CUBE R1.w, R6.y, R6.z 0088 800008fe 00006611 14 t: RECIP_IEEE R0.x, |PV.z| | 0088 800008fe 00406611 14 t: RECIP_IEEE R2.x, |PV.z| 0090 801fe001 204280fd 15 y: MULADD_IEEE R2.y, R1.x, PS, [0x3fc00000 1.5].x | 0090 801fe001 206280fd 15 y: MULADD_IEEE R3.y, R1.x, PS, [0x3fc00000 1.5].x 0092 3fc00000 0092 3fc00000 0094 00000401 004280fd 16 x: MULADD_IEEE R2.x, R1.y, R0.x, [0x3fc00000 1.5].x | 0094 00004401 006280fd 16 x: MULADD_IEEE R3.x, R1.y, R2.x, [0x3fc00000 1.5].x 0096 80000c01 60401910 w: MOV R2.w, R1.w | 0096 80000c01 60601910 w: MOV R3.w, R1.w 0098 3fc00000 0098 3fc00000 0006 0000000a 80800400 TEX 2 @20 0006 0000000a 80800400 TEX 2 @20 0020 00021210 f00d1001 0c810000 SAMPLE R1.xyzw, R2.xywx, RID:18, SID:2 CT:NNNN | 0020 00031210 f00d1001 0c810000 SAMPLE R1.xyzw, R3.xywx, RID:18, SID:2 CT:NNNN 0024 00041010 f00d1002 fc800000 SAMPLE R2.xyzw, R4.xy__, RID:16, SID:0 CT:NNNN | 0024 00051010 f00d1002 fc800000 SAMPLE R2.xyzw, R5.xy__, RID:16, SID:0 CT:NNNN 0008 80000032 a0340000 ALU 14 @100 KC0[CB0:0-31] 0008 80000032 a0340000 ALU 14 @100 KC0[CB0:0-31] 0100 00002002 60000210 17 w: MUL_IEEE R0.w, R2.x, R1.x | 0100 00002002 60600210 17 w: MUL_IEEE R3.w, R2.x, R1.x 0102 80100002 60600210 t: MUL_IEEE R3.w, R2.x, KC0[0].x | 0102 80100002 60800210 t: MUL_IEEE R4.w, R2.x, KC0[0].x 0104 00802402 40600210 18 z: MUL_IEEE R3.z, R2.y, R1.y 0104 00802402 40600210 18 z: MUL_IEEE R3.z, R2.y, R1.y 0106 00900402 60800210 w: MUL_IEEE R4.w, R2.y, KC0[0].y | 0106 00900402 60a00210 w: MUL_IEEE R5.w, R2.y, KC0[0].y 0108 80108cfe 600680ff t: MULADD_IEEE R0.w, PV.w, KC0[4].x, PS SCL_122 | 0108 80108cfe 606680ff t: MULADD_IEEE R3.w, PV.w, KC0[4].x, PS SCL_122 0110 0010a0ff 00a00210 19 x: MUL_IEEE R5.x, PS, KC0[5].x | 0110 0010a0ff 00800210 19 x: MUL_IEEE R4.x, PS, KC0[5].x 0112 01002802 40200210 z: MUL_IEEE R1.z, R2.z, R1.z 0112 01002802 40200210 z: MUL_IEEE R1.z, R2.z, R1.z 0114 81100802 60000210 w: MUL_IEEE R0.w, R2.z, KC0[0].z | 0114 81100802 60200210 w: MUL_IEEE R1.w, R2.z, KC0[0].z 0116 80108803 60228c04 20 w: MULADD_IEEE R1.w, R3.z, KC0[4].x, R4.w | 0116 80108803 60628c05 20 w: MULADD_IEEE R3.w, R3.z, KC0[4].x, R5.w 0118 0090acfe 20a00210 21 y: MUL_IEEE R5.y, PV.w, KC0[5].y | 0118 0090acfe 20800210 21 y: MUL_IEEE R4.y, PV.w, KC0[5].y 0120 80108801 60028c00 w: MULADD_IEEE R0.w, R1.z, KC0[4].x, R0.w | 0120 80108801 60228c01 w: MULADD_IEEE R1.w, R1.z, KC0[4].x, R1.w 0122 0110acfe 40a00210 22 z: MUL_IEEE R5.z, PV.w, KC0[5].z | 0122 0110acfe 40800210 22 z: MUL_IEEE R4.z, PV.w, KC0[5].z 0124 81000c02 60000210 w: MUL_IEEE R0.w, R2.w, R0.z 0124 81000c02 60000210 w: MUL_IEEE R0.w, R2.w, R0.z 0126 8190acfe 60a00210 23 w: MUL_IEEE R5.w, PV.w, KC0[5].w | 0126 8190acfe 60800210 23 w: MUL_IEEE R4.w, PV.w, KC0[5].w 0010 c0028000 94200688 EXPORT_DONE PIXEL 0 R5.xyzw EOP | 0010 c0020000 94200688 EXPORT_DONE PIXEL 0 R4.xyzw EOP ===== SHADER_END =============================================================== ===== SHADER_END =============================================================== ===== SHADER #200 OPT ====================================== PS/RS880/R600 ===== ===== SHADER #200 OPT ====================================== PS/RS880/R600 ===== ===== 120 dw ===== 5 gprs ===== 0 stack ======================================== ===== 120 dw ===== 5 gprs ===== 0 stack ======================================== 0000 00000006 a0080000 ALU 3 @12 0000 00000006 a0080000 ALU 3 @12 0012 80000c00 6f806610 1 t: RECIP_IEEE T0.w, R0.w 0012 80000c00 6f806610 1 t: RECIP_IEEE T0.w, R0.w 0014 018f8000 00000210 2 x: MUL_IEEE R0.x, R0.x, T0.w 0014 018f8000 00000210 2 x: MUL_IEEE R0.x, R0.x, T0.w 0016 818f8400 20000210 y: MUL_IEEE R0.y, R0.y, T0.w 0016 818f8400 20000210 y: MUL_IEEE R0.y, R0.y, T0.w 0002 0000000a 80800000 TEX 1 @20 0002 0000000a 80800000 TEX 1 @20 0020 00001110 f01f9004 fc808000 SAMPLE R4.xy__, R0.xy__, RID:17, SID:1 CT:NNNN 0020 00001110 f01f9004 fc808000 SAMPLE R4.xy__, R0.xy__, RID:17, SID:1 CT:NNNN 0004 0000000c a0700000 ALU 29 @24 0004 0000000c a0700000 ALU 29 @24 0024 801fa404 600204fd 3 w: MULADD R0.w, R4.y, [0x40000000 2].x, [0xbf800000 -1].y 0024 801fa404 600204fd 3 w: MULADD R0.w, R4.y, [0x40000000 2].x, [0xbf800000 -1].y 0026 40000000 0026 40000000 0027 bf800000 0027 bf800000 0028 801fa004 6fe204fd 4 w: MULADD T3.w, R4.x, [0x40000000 2].x, [0xbf800000 -1].y 0028 801fa004 6fe204fd 4 w: MULADD T3.w, R4.x, [0x40000000 2].x, [0xbf800000 -1].y 0030 40000000 0030 40000000 0031 bf800000 0031 bf800000 0032 018fec7f 00005000 5 x: DOT4 __.x, T3.w, T3.w 0032 018fec7f 00005000 5 x: DOT4 __.x, T3.w, T3.w 0034 01800c00 20085000 y: DOT4 __.y, R0.w, R0.w VEC_120 0034 01800c00 20085000 y: DOT4 __.y, R0.w, R0.w VEC_120 0036 001f00f8 40005000 z: DOT4 __.z, 0, 0 0036 001f00f8 40005000 z: DOT4 __.z, 0, 0 0038 801f00f8 6f805010 w: DOT4 T0.w, 0, 0 0038 801f00f8 6f805010 w: DOT4 T0.w, 0, 0 0040 801f3c7c efc00010 6 w: ADD_sat T2.w, -T0.w, 1.0 0040 801f3c7c efc00010 6 w: ADD_sat T2.w, -T0.w, 1.0 0042 80000c7e 6f806710 7 t: RECIPSQRT_CLAMPED T0.w, T2.w 0042 80000c7e 6f806710 7 t: RECIPSQRT_CLAMPED T0.w, T2.w 0044 018fe001 0f840210 8 x: MUL_IEEE T0.x, R1.x, T3.w VEC_021 0044 018fe001 0f840210 8 x: MUL_IEEE T0.x, R1.x, T3.w VEC_021 0046 018fcc7c 6fa00210 w: MUL_IEEE T1.w, T0.w, T2.w 0046 018fcc7c 6fa00210 w: MUL_IEEE T1.w, T0.w, T2.w 0048 818fe801 6f840210 t: MUL_IEEE T0.w, R1.z, T3.w SCL_122 0048 818fe801 6f840210 t: MUL_IEEE T0.w, R1.z, T3.w SCL_122 0050 01800002 0fa2807c 9 x: MULADD_IEEE T1.x, R2.x, R0.w, T0.x 0050 01800002 0fa2807c 9 x: MULADD_IEEE T1.x, R2.x, R0.w, T0.x 0052 018fe401 2f8c0210 y: MUL_IEEE T0.y, R1.y, T3.w VEC_102 0052 018fe401 2f8c0210 y: MUL_IEEE T0.y, R1.y, T3.w VEC_102 0054 81800802 6f828c7c w: MULADD_IEEE T0.w, R2.z, R0.w, T0.w 0054 81800802 6f828c7c w: MULADD_IEEE T0.w, R2.z, R0.w, T0.w 0056 01800402 0f82847c 10 x: MULADD_IEEE T0.x, R2.y, R0.w, T0.y 0056 01800402 0f82847c 10 x: MULADD_IEEE T0.x, R2.y, R0.w, T0.y 0058 801f1c7e 4f834c7d z: CNDGE T0.z, -T2.w, 0, T1.w 0058 801f1c7e 4f834c7d z: CNDGE T0.z, -T2.w, 0, T1.w 0060 010f8403 0f86807c 11 x: MULADD_IEEE T0.x, R3.y, T0.z, T0.x VEC_021 0060 010f8403 0f86807c 11 x: MULADD_IEEE T0.x, R3.y, T0.z, T0.x VEC_021 0062 010f8003 2f82807d y: MULADD_IEEE T0.y, R3.x, T0.z, T1.x 0062 010f8003 2f82807d y: MULADD_IEEE T0.y, R3.x, T0.z, T1.x 0064 810f8803 6f828c7c w: MULADD_IEEE T0.w, R3.z, T0.z, T0.w 0064 810f8803 6f828c7c w: MULADD_IEEE T0.w, R3.z, T0.z, T0.w 0066 000f8c7c 0f805210 12 x: CUBE T0.x, T0.w, T0.x 0066 000f8c7c 0f805210 12 x: CUBE T0.x, T0.w, T0.x 0068 008f8c7c 2f805210 y: CUBE T0.y, T0.w, T0.y 0068 008f8c7c 2f805210 y: CUBE T0.y, T0.w, T0.y 0070 018f847c 4f805210 z: CUBE T0.z, T0.y, T0.w 0070 018f847c 4f805210 z: CUBE T0.z, T0.y, T0.w 0072 818f807c 60205210 w: CUBE R1.w, T0.x, T0.w 0072 818f807c 60205210 w: CUBE R1.w, T0.x, T0.w 0074 8000087c 6f806611 13 t: RECIP_IEEE T0.w, |T0.z| 0074 8000087c 6f806611 13 t: RECIP_IEEE T0.w, |T0.z| 0076 018f847c 002280fd 14 x: MULADD_IEEE R1.x, T0.y, T0.w, [0x3fc00000 1.5].x 0076 018f847c 002280fd 14 x: MULADD_IEEE R1.x, T0.y, T0.w, [0x3fc00000 1.5].x 0078 818f807c 202280fd y: MULADD_IEEE R1.y, T0.x, T0.w, [0x3fc00000 1.5].x 0078 818f807c 202280fd y: MULADD_IEEE R1.y, T0.x, T0.w, [0x3fc00000 1.5].x 0080 3fc00000 0080 3fc00000 0006 0000002a 80800400 TEX 2 @84 0006 0000002a 80800400 TEX 2 @84 0084 00011210 f01d1002 0c810000 SAMPLE R2.xyz_, R1.xywx, RID:18, SID:2 CT:NNNN 0084 00011210 f01d1002 0c810000 SAMPLE R2.xyz_, R1.xywx, RID:18, SID:2 CT:NNNN 0088 00001010 f00d1001 fc800000 SAMPLE R1.xyzw, R0.xy__, RID:16, SID:0 CT:NNNN 0088 00001010 f00d1001 fc800000 SAMPLE R1.xyzw, R0.xy__, RID:16, SID:0 CT:NNNN 0008 4000002e a0340000 ALU 14 @92 KC0[CB0:0-15] 0008 4000002e a0340000 ALU 14 @92 KC0[CB0:0-15] 0092 01004801 0f800210 15 x: MUL_IEEE T0.x, R1.z, R2.z 0092 01004801 0f800210 15 x: MUL_IEEE T0.x, R1.z, R2.z 0094 81100801 2f800210 y: MUL_IEEE T0.y, R1.z, KC0[0].z 0094 81100801 2f800210 y: MUL_IEEE T0.y, R1.z, KC0[0].z 0096 0010807c 0fa2847c 16 x: MULADD_IEEE T1.x, T0.x, KC0[4].x, T0.y 0096 0010807c 0fa2847c 16 x: MULADD_IEEE T1.x, T0.x, KC0[4].x, T0.y 0098 00900401 2f800210 y: MUL_IEEE T0.y, R1.y, KC0[0].y 0098 00900401 2f800210 y: MUL_IEEE T0.y, R1.y, KC0[0].y 0100 00004001 4f880210 z: MUL_IEEE T0.z, R1.x, R2.x VEC_120 0100 00004001 4f880210 z: MUL_IEEE T0.z, R1.x, R2.x VEC_120 0102 00804401 6f800210 w: MUL_IEEE T0.w, R1.y, R2.y 0102 00804401 6f800210 w: MUL_IEEE T0.w, R1.y, R2.y 0104 80100001 0f840210 t: MUL_IEEE T0.x, R1.x, KC0[0].x SCL_122 0104 80100001 0f840210 t: MUL_IEEE T0.x, R1.x, KC0[0].x SCL_122 0106 0010887c 0f82807c 17 x: MULADD_IEEE T0.x, T0.z, KC0[4].x, T0.x 0106 0010887c 0f82807c 17 x: MULADD_IEEE T0.x, T0.z, KC0[4].x, T0.x 0108 00108c7c 2f82847c y: MULADD_IEEE T0.y, T0.w, KC0[4].x, T0.y 0108 00108c7c 2f82847c y: MULADD_IEEE T0.y, T0.w, KC0[4].x, T0.y 0110 81000c01 4f880210 z: MUL_IEEE T0.z, R1.w, R0.z VEC_120 0110 81000c01 4f880210 z: MUL_IEEE T0.z, R1.w, R0.z VEC_120 0112 0010a07c 00000210 18 x: MUL_IEEE R0.x, T0.x, KC0[5].x 0112 0010a07c 00000210 18 x: MUL_IEEE R0.x, T0.x, KC0[5].x 0114 0090a47c 20000210 y: MUL_IEEE R0.y, T0.y, KC0[5].y 0114 0090a47c 20000210 y: MUL_IEEE R0.y, T0.y, KC0[5].y 0116 0110a07d 40080210 z: MUL_IEEE R0.z, T1.x, KC0[5].z VEC_120 0116 0110a07d 40080210 z: MUL_IEEE R0.z, T1.x, KC0[5].z VEC_120 0118 8190a87c 60000210 w: MUL_IEEE R0.w, T0.z, KC0[5].w 0118 8190a87c 60000210 w: MUL_IEEE R0.w, T0.z, KC0[5].w 0010 c0000000 94200688 EXPORT_DONE PIXEL 0 R0.xyzw EOP 0010 c0000000 94200688 EXPORT_DONE PIXEL 0 R0.xyzw EOP ===== SHADER_END =============================================================== ===== SHADER_END =============================================================== -------------------------------------------------------------- -------------------------------------------------------------- VERT VERT DCL IN[0] DCL IN[0] DCL IN[1] DCL IN[1] DCL IN[2] DCL IN[2] DCL IN[3] DCL IN[3] DCL OUT[0], POSITION DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[1], GENERIC[0] DCL OUT[2], GENERIC[1] DCL OUT[2], GENERIC[1] DCL OUT[3], GENERIC[2] DCL OUT[3], GENERIC[2] DCL OUT[4], GENERIC[3] DCL OUT[4], GENERIC[3] DCL CONST[0..11] DCL CONST[0..11] DCL TEMP[0..6], LOCAL DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: DP4 TEMP[0].x, CONST[8], IN[0] 0: DP4 TEMP[0].x, CONST[8], IN[0] 1: DP4 TEMP[1].x, CONST[9], IN[0] 1: DP4 TEMP[1].x, CONST[9], IN[0] 2: DP4 TEMP[2].x, CONST[10], IN[0] 2: DP4 TEMP[2].x, CONST[10], IN[0] 3: DP3 TEMP[3].x, CONST[8].xyzz, IN[2].xyzz 3: DP3 TEMP[3].x, CONST[8].xyzz, IN[2].xyzz 4: DP3 TEMP[4].x, CONST[9].xyzz, IN[2].xyzz 4: DP3 TEMP[4].x, CONST[9].xyzz, IN[2].xyzz 5: MOV TEMP[3].y, TEMP[4].xxxx 5: MOV TEMP[3].y, TEMP[4].xxxx 6: DP3 TEMP[4].x, CONST[10].xyzz, IN[2].xyzz 6: DP3 TEMP[4].x, CONST[10].xyzz, IN[2].xyzz 7: MOV TEMP[3].z, TEMP[4].xxxx 7: MOV TEMP[3].z, TEMP[4].xxxx 8: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 8: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz 9: RSQ TEMP[4].x, TEMP[4].xxxx 9: RSQ TEMP[4].x, TEMP[4].xxxx 10: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 10: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx 11: DP3 TEMP[4].x, CONST[8].xyzz, IN[3].xyzz 11: DP3 TEMP[4].x, CONST[8].xyzz, IN[3].xyzz 12: DP3 TEMP[5].x, CONST[9].xyzz, IN[3].xyzz 12: DP3 TEMP[5].x, CONST[9].xyzz, IN[3].xyzz 13: MOV TEMP[4].y, TEMP[5].xxxx 13: MOV TEMP[4].y, TEMP[5].xxxx 14: DP3 TEMP[5].x, CONST[10].xyzz, IN[3].xyzz 14: DP3 TEMP[5].x, CONST[10].xyzz, IN[3].xyzz 15: MOV TEMP[4].z, TEMP[5].xxxx 15: MOV TEMP[4].z, TEMP[5].xxxx 16: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 16: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz 17: RSQ TEMP[5].x, TEMP[5].xxxx 17: RSQ TEMP[5].x, TEMP[5].xxxx 18: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 18: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 19: MOV TEMP[5].w, IN[1].wwww 19: MOV TEMP[5].w, IN[1].wwww 20: MUL TEMP[6].xy, IN[1].xyyy, CONST[11].xyyy 20: MUL TEMP[6].xy, IN[1].xyyy, CONST[11].xyyy 21: MAD TEMP[5].xy, CONST[11].zwww, IN[1].wwww, TEMP[6].xyyy 21: MAD TEMP[5].xy, CONST[11].zwww, IN[1].wwww, TEMP[6].xyyy 22: MOV TEMP[5].z, IN[2].wwww 22: MOV TEMP[5].z, IN[2].wwww 23: MUL TEMP[0], CONST[0], TEMP[0].xxxx 23: MUL TEMP[0], CONST[0], TEMP[0].xxxx 24: MAD TEMP[0], CONST[1], TEMP[1].xxxx, TEMP[0] 24: MAD TEMP[0], CONST[1], TEMP[1].xxxx, TEMP[0] 25: MAD TEMP[0], CONST[2], TEMP[2].xxxx, TEMP[0] 25: MAD TEMP[0], CONST[2], TEMP[2].xxxx, TEMP[0] 26: ADD TEMP[0], TEMP[0], CONST[3] 26: ADD TEMP[0], TEMP[0], CONST[3] 27: MUL TEMP[1], CONST[4], TEMP[4].xxxx 27: MUL TEMP[1], CONST[4], TEMP[4].xxxx 28: MAD TEMP[1], CONST[5], TEMP[4].yyyy, TEMP[1] 28: MAD TEMP[1], CONST[5], TEMP[4].yyyy, TEMP[1] 29: MAD TEMP[1].xyz, CONST[6], TEMP[4].zzzz, TEMP[1] 29: MAD TEMP[1].xyz, CONST[6], TEMP[4].zzzz, TEMP[1] 30: MOV TEMP[1].xyz, TEMP[1].xyzx 30: MOV TEMP[1].xyz, TEMP[1].xyzx 31: MUL TEMP[6].xyz, TEMP[3].zxyy, TEMP[4].yzxx 31: MUL TEMP[6].xyz, TEMP[3].zxyy, TEMP[4].yzxx 32: MAD TEMP[4].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[6].xyzz 32: MAD TEMP[4].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[6].xyzz 33: MUL TEMP[2].xyz, TEMP[4].xyzz, IN[3].wwww 33: MUL TEMP[2].xyz, TEMP[4].xyzz, IN[3].wwww 34: MUL TEMP[4], CONST[4], TEMP[2].xxxx 34: MUL TEMP[4], CONST[4], TEMP[2].xxxx 35: MAD TEMP[4], CONST[5], TEMP[2].yyyy, TEMP[4] 35: MAD TEMP[4], CONST[5], TEMP[2].yyyy, TEMP[4] 36: MAD TEMP[2].xyz, CONST[6], TEMP[2].zzzz, TEMP[4] 36: MAD TEMP[2].xyz, CONST[6], TEMP[2].zzzz, TEMP[4] 37: MOV TEMP[2].xyz, TEMP[2].xyzx 37: MOV TEMP[2].xyz, TEMP[2].xyzx 38: MUL TEMP[4], CONST[4], TEMP[3].xxxx 38: MUL TEMP[4], CONST[4], TEMP[3].xxxx 39: MAD TEMP[4], CONST[5], TEMP[3].yyyy, TEMP[4] 39: MAD TEMP[4], CONST[5], TEMP[3].yyyy, TEMP[4] 40: MAD TEMP[3].xyz, CONST[6], TEMP[3].zzzz, TEMP[4] 40: MAD TEMP[3].xyz, CONST[6], TEMP[3].zzzz, TEMP[4] 41: MOV TEMP[3].xyz, TEMP[3].xyzx 41: MOV TEMP[3].xyz, TEMP[3].xyzx 42: MOV OUT[1], TEMP[5] 42: MOV OUT[1], TEMP[5] 43: MOV OUT[2], TEMP[1] 43: MOV OUT[2], TEMP[1] 44: MOV OUT[3], TEMP[2] 44: MOV OUT[3], TEMP[2] 45: MOV OUT[4], TEMP[3] 45: MOV OUT[4], TEMP[3] 46: MOV OUT[0], TEMP[0] 46: MOV OUT[0], TEMP[0] 47: END 47: END ; ModuleID = 'tgsi' ; ModuleID = 'tgsi' define void @main() #0 { | define void @main(<4 x float> inreg, <4 x float> inreg, <4 x float> inreg, <4 x float> inreg, <4 x float> in main_body: main_body: %0 = call float @llvm.R600.load.input(i32 4) | %5 = extractelement <4 x float> %1, i32 0 %1 = call float @llvm.R600.load.input(i32 5) | %6 = extractelement <4 x float> %1, i32 1 %2 = call float @llvm.R600.load.input(i32 6) | %7 = extractelement <4 x float> %1, i32 2 %3 = call float @llvm.R600.load.input(i32 7) | %8 = extractelement <4 x float> %1, i32 3 %4 = call float @llvm.R600.load.input(i32 8) | %9 = extractelement <4 x float> %2, i32 0 %5 = call float @llvm.R600.load.input(i32 9) | %10 = extractelement <4 x float> %2, i32 1 %6 = call float @llvm.R600.load.input(i32 10) | %11 = extractelement <4 x float> %2, i32 3 %7 = call float @llvm.R600.load.input(i32 11) | %12 = extractelement <4 x float> %3, i32 0 %8 = call float @llvm.R600.load.input(i32 12) | %13 = extractelement <4 x float> %3, i32 1 %9 = call float @llvm.R600.load.input(i32 13) | %14 = extractelement <4 x float> %3, i32 2 %10 = call float @llvm.R600.load.input(i32 14) | %15 = extractelement <4 x float> %3, i32 3 %11 = call float @llvm.R600.load.input(i32 15) | %16 = extractelement <4 x float> %4, i32 0 %12 = call float @llvm.R600.load.input(i32 16) | %17 = extractelement <4 x float> %4, i32 1 %13 = call float @llvm.R600.load.input(i32 17) | %18 = extractelement <4 x float> %4, i32 2 %14 = call float @llvm.R600.load.input(i32 18) | %19 = extractelement <4 x float> %4, i32 3 %15 = call float @llvm.R600.load.input(i32 19) < %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) < %17 = extractelement <4 x float> %16, i32 0 < %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) < %19 = extractelement <4 x float> %18, i32 1 < %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %21 = extractelement <4 x float> %20, i32 2 | %21 = extractelement <4 x float> %20, i32 0 %22 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %22 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %23 = extractelement <4 x float> %22, i32 3 | %23 = extractelement <4 x float> %22, i32 1 %24 = insertelement <4 x float> undef, float %17, i32 0 | %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %25 = insertelement <4 x float> %24, float %19, i32 1 | %25 = extractelement <4 x float> %24, i32 2 %26 = insertelement <4 x float> %25, float %21, i32 2 | %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %27 = insertelement <4 x float> %26, float %23, i32 3 | %27 = extractelement <4 x float> %26, i32 3 %28 = insertelement <4 x float> undef, float %0, i32 0 | %28 = insertelement <4 x float> undef, float %21, i32 0 %29 = insertelement <4 x float> %28, float %1, i32 1 | %29 = insertelement <4 x float> %28, float %23, i32 1 %30 = insertelement <4 x float> %29, float %2, i32 2 | %30 = insertelement <4 x float> %29, float %25, i32 2 %31 = insertelement <4 x float> %30, float %3, i32 3 | %31 = insertelement <4 x float> %30, float %27, i32 3 %32 = call float @llvm.AMDGPU.dp4(<4 x float> %27, <4 x float> %31) | %32 = insertelement <4 x float> undef, float %5, i32 0 %33 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) | %33 = insertelement <4 x float> %32, float %6, i32 1 %34 = extractelement <4 x float> %33, i32 0 | %34 = insertelement <4 x float> %33, float %7, i32 2 %35 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) | %35 = insertelement <4 x float> %34, float %8, i32 3 %36 = extractelement <4 x float> %35, i32 1 | %36 = call float @llvm.AMDGPU.dp4(<4 x float> %31, <4 x float> %35) %37 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %37 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %38 = extractelement <4 x float> %37, i32 2 | %38 = extractelement <4 x float> %37, i32 0 %39 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %39 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %40 = extractelement <4 x float> %39, i32 3 | %40 = extractelement <4 x float> %39, i32 1 %41 = insertelement <4 x float> undef, float %34, i32 0 | %41 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %42 = insertelement <4 x float> %41, float %36, i32 1 | %42 = extractelement <4 x float> %41, i32 2 %43 = insertelement <4 x float> %42, float %38, i32 2 | %43 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %44 = insertelement <4 x float> %43, float %40, i32 3 | %44 = extractelement <4 x float> %43, i32 3 %45 = insertelement <4 x float> undef, float %0, i32 0 | %45 = insertelement <4 x float> undef, float %38, i32 0 %46 = insertelement <4 x float> %45, float %1, i32 1 | %46 = insertelement <4 x float> %45, float %40, i32 1 %47 = insertelement <4 x float> %46, float %2, i32 2 | %47 = insertelement <4 x float> %46, float %42, i32 2 %48 = insertelement <4 x float> %47, float %3, i32 3 | %48 = insertelement <4 x float> %47, float %44, i32 3 %49 = call float @llvm.AMDGPU.dp4(<4 x float> %44, <4 x float> %48) | %49 = insertelement <4 x float> undef, float %5, i32 0 %50 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10 | %50 = insertelement <4 x float> %49, float %6, i32 1 %51 = extractelement <4 x float> %50, i32 0 | %51 = insertelement <4 x float> %50, float %7, i32 2 %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10 | %52 = insertelement <4 x float> %51, float %8, i32 3 %53 = extractelement <4 x float> %52, i32 1 | %53 = call float @llvm.AMDGPU.dp4(<4 x float> %48, <4 x float> %52) %54 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10 %54 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10 %55 = extractelement <4 x float> %54, i32 2 | %55 = extractelement <4 x float> %54, i32 0 %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10 %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10 %57 = extractelement <4 x float> %56, i32 3 | %57 = extractelement <4 x float> %56, i32 1 %58 = insertelement <4 x float> undef, float %51, i32 0 | %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10 %59 = insertelement <4 x float> %58, float %53, i32 1 | %59 = extractelement <4 x float> %58, i32 2 %60 = insertelement <4 x float> %59, float %55, i32 2 | %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10 %61 = insertelement <4 x float> %60, float %57, i32 3 | %61 = extractelement <4 x float> %60, i32 3 %62 = insertelement <4 x float> undef, float %0, i32 0 | %62 = insertelement <4 x float> undef, float %55, i32 0 %63 = insertelement <4 x float> %62, float %1, i32 1 | %63 = insertelement <4 x float> %62, float %57, i32 1 %64 = insertelement <4 x float> %63, float %2, i32 2 | %64 = insertelement <4 x float> %63, float %59, i32 2 %65 = insertelement <4 x float> %64, float %3, i32 3 | %65 = insertelement <4 x float> %64, float %61, i32 3 %66 = call float @llvm.AMDGPU.dp4(<4 x float> %61, <4 x float> %65) | %66 = insertelement <4 x float> undef, float %5, i32 0 %67 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) | %67 = insertelement <4 x float> %66, float %6, i32 1 %68 = extractelement <4 x float> %67, i32 0 | %68 = insertelement <4 x float> %67, float %7, i32 2 %69 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) | %69 = insertelement <4 x float> %68, float %8, i32 3 %70 = extractelement <4 x float> %69, i32 1 | %70 = call float @llvm.AMDGPU.dp4(<4 x float> %65, <4 x float> %69) %71 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %71 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %72 = extractelement <4 x float> %71, i32 2 | %72 = extractelement <4 x float> %71, i32 0 %73 = insertelement <4 x float> undef, float %68, i32 0 | %73 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %74 = insertelement <4 x float> %73, float %70, i32 1 | %74 = extractelement <4 x float> %73, i32 1 %75 = insertelement <4 x float> %74, float %72, i32 2 | %75 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %76 = insertelement <4 x float> %75, float 0.000000e+00, i32 3 | %76 = extractelement <4 x float> %75, i32 2 %77 = insertelement <4 x float> undef, float %8, i32 0 | %77 = insertelement <4 x float> undef, float %72, i32 0 %78 = insertelement <4 x float> %77, float %9, i32 1 | %78 = insertelement <4 x float> %77, float %74, i32 1 %79 = insertelement <4 x float> %78, float %10, i32 2 | %79 = insertelement <4 x float> %78, float %76, i32 2 %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 3 %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 3 %81 = call float @llvm.AMDGPU.dp4(<4 x float> %76, <4 x float> %80) | %81 = insertelement <4 x float> undef, float %12, i32 0 %82 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) | %82 = insertelement <4 x float> %81, float %13, i32 1 %83 = extractelement <4 x float> %82, i32 0 | %83 = insertelement <4 x float> %82, float %14, i32 2 %84 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) | %84 = insertelement <4 x float> %83, float 0.000000e+00, i32 3 %85 = extractelement <4 x float> %84, i32 1 | %85 = call float @llvm.AMDGPU.dp4(<4 x float> %80, <4 x float> %84) %86 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %86 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %87 = extractelement <4 x float> %86, i32 2 | %87 = extractelement <4 x float> %86, i32 0 %88 = insertelement <4 x float> undef, float %83, i32 0 | %88 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %89 = insertelement <4 x float> %88, float %85, i32 1 | %89 = extractelement <4 x float> %88, i32 1 %90 = insertelement <4 x float> %89, float %87, i32 2 | %90 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 3 | %91 = extractelement <4 x float> %90, i32 2 %92 = insertelement <4 x float> undef, float %8, i32 0 | %92 = insertelement <4 x float> undef, float %87, i32 0 %93 = insertelement <4 x float> %92, float %9, i32 1 | %93 = insertelement <4 x float> %92, float %89, i32 1 %94 = insertelement <4 x float> %93, float %10, i32 2 | %94 = insertelement <4 x float> %93, float %91, i32 2 %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3 %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3 %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95) | %96 = insertelement <4 x float> undef, float %12, i32 0 %97 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10 | %97 = insertelement <4 x float> %96, float %13, i32 1 %98 = extractelement <4 x float> %97, i32 0 | %98 = insertelement <4 x float> %97, float %14, i32 2 %99 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10 | %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 3 %100 = extractelement <4 x float> %99, i32 1 | %100 = call float @llvm.AMDGPU.dp4(<4 x float> %95, <4 x float> %99) %101 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %101 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %102 = extractelement <4 x float> %101, i32 2 | %102 = extractelement <4 x float> %101, i32 0 %103 = insertelement <4 x float> undef, float %98, i32 0 | %103 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %104 = insertelement <4 x float> %103, float %100, i32 1 | %104 = extractelement <4 x float> %103, i32 1 %105 = insertelement <4 x float> %104, float %102, i32 2 | %105 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %106 = insertelement <4 x float> %105, float 0.000000e+00, i32 3 | %106 = extractelement <4 x float> %105, i32 2 %107 = insertelement <4 x float> undef, float %8, i32 0 | %107 = insertelement <4 x float> undef, float %102, i32 0 %108 = insertelement <4 x float> %107, float %9, i32 1 | %108 = insertelement <4 x float> %107, float %104, i32 1 %109 = insertelement <4 x float> %108, float %10, i32 2 | %109 = insertelement <4 x float> %108, float %106, i32 2 %110 = insertelement <4 x float> %109, float 0.000000e+00, i32 3 %110 = insertelement <4 x float> %109, float 0.000000e+00, i32 3 %111 = call float @llvm.AMDGPU.dp4(<4 x float> %106, <4 x float> %110) | %111 = insertelement <4 x float> undef, float %12, i32 0 %112 = insertelement <4 x float> undef, float %81, i32 0 | %112 = insertelement <4 x float> %111, float %13, i32 1 %113 = insertelement <4 x float> %112, float %96, i32 1 | %113 = insertelement <4 x float> %112, float %14, i32 2 %114 = insertelement <4 x float> %113, float %111, i32 2 | %114 = insertelement <4 x float> %113, float 0.000000e+00, i32 3 %115 = insertelement <4 x float> %114, float 0.000000e+00, i32 3 | %115 = call float @llvm.AMDGPU.dp4(<4 x float> %110, <4 x float> %114) %116 = insertelement <4 x float> undef, float %81, i32 0 | %116 = insertelement <4 x float> undef, float %85, i32 0 %117 = insertelement <4 x float> %116, float %96, i32 1 | %117 = insertelement <4 x float> %116, float %100, i32 1 %118 = insertelement <4 x float> %117, float %111, i32 2 | %118 = insertelement <4 x float> %117, float %115, i32 2 %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 3 %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 3 %120 = call float @llvm.AMDGPU.dp4(<4 x float> %115, <4 x float> %119) | %120 = insertelement <4 x float> undef, float %85, i32 0 %121 = call float @llvm.AMDGPU.rsq(float %120) | %121 = insertelement <4 x float> %120, float %100, i32 1 %122 = fmul float %81, %121 | %122 = insertelement <4 x float> %121, float %115, i32 2 %123 = fmul float %96, %121 | %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3 %124 = fmul float %111, %121 | %124 = call float @llvm.AMDGPU.dp4(<4 x float> %119, <4 x float> %123) %125 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8 | %125 = call float @llvm.AMDGPU.rsq(float %124) %126 = extractelement <4 x float> %125, i32 0 | %126 = fmul float %85, %125 %127 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8 | %127 = fmul float %100, %125 %128 = extractelement <4 x float> %127, i32 1 | %128 = fmul float %115, %125 %129 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8 %129 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8 %130 = extractelement <4 x float> %129, i32 2 | %130 = extractelement <4 x float> %129, i32 0 %131 = insertelement <4 x float> undef, float %126, i32 0 | %131 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8 %132 = insertelement <4 x float> %131, float %128, i32 1 | %132 = extractelement <4 x float> %131, i32 1 %133 = insertelement <4 x float> %132, float %130, i32 2 | %133 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8 %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 3 | %134 = extractelement <4 x float> %133, i32 2 %135 = insertelement <4 x float> undef, float %12, i32 0 | %135 = insertelement <4 x float> undef, float %130, i32 0 %136 = insertelement <4 x float> %135, float %13, i32 1 | %136 = insertelement <4 x float> %135, float %132, i32 1 %137 = insertelement <4 x float> %136, float %14, i32 2 | %137 = insertelement <4 x float> %136, float %134, i32 2 %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 3 %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 3 %139 = call float @llvm.AMDGPU.dp4(<4 x float> %134, <4 x float> %138) | %139 = insertelement <4 x float> undef, float %16, i32 0 %140 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9 | %140 = insertelement <4 x float> %139, float %17, i32 1 %141 = extractelement <4 x float> %140, i32 0 | %141 = insertelement <4 x float> %140, float %18, i32 2 %142 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9 | %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 3 %143 = extractelement <4 x float> %142, i32 1 | %143 = call float @llvm.AMDGPU.dp4(<4 x float> %138, <4 x float> %142) %144 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9 %144 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9 %145 = extractelement <4 x float> %144, i32 2 | %145 = extractelement <4 x float> %144, i32 0 %146 = insertelement <4 x float> undef, float %141, i32 0 | %146 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9 %147 = insertelement <4 x float> %146, float %143, i32 1 | %147 = extractelement <4 x float> %146, i32 1 %148 = insertelement <4 x float> %147, float %145, i32 2 | %148 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9 %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 3 | %149 = extractelement <4 x float> %148, i32 2 %150 = insertelement <4 x float> undef, float %12, i32 0 | %150 = insertelement <4 x float> undef, float %145, i32 0 %151 = insertelement <4 x float> %150, float %13, i32 1 | %151 = insertelement <4 x float> %150, float %147, i32 1 %152 = insertelement <4 x float> %151, float %14, i32 2 | %152 = insertelement <4 x float> %151, float %149, i32 2 %153 = insertelement <4 x float> %152, float 0.000000e+00, i32 3 %153 = insertelement <4 x float> %152, float 0.000000e+00, i32 3 %154 = call float @llvm.AMDGPU.dp4(<4 x float> %149, <4 x float> %153) | %154 = insertelement <4 x float> undef, float %16, i32 0 %155 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 | %155 = insertelement <4 x float> %154, float %17, i32 1 %156 = extractelement <4 x float> %155, i32 0 | %156 = insertelement <4 x float> %155, float %18, i32 2 %157 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 | %157 = insertelement <4 x float> %156, float 0.000000e+00, i32 3 %158 = extractelement <4 x float> %157, i32 1 | %158 = call float @llvm.AMDGPU.dp4(<4 x float> %153, <4 x float> %157) %159 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %159 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %160 = extractelement <4 x float> %159, i32 2 | %160 = extractelement <4 x float> %159, i32 0 %161 = insertelement <4 x float> undef, float %156, i32 0 | %161 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %162 = insertelement <4 x float> %161, float %158, i32 1 | %162 = extractelement <4 x float> %161, i32 1 %163 = insertelement <4 x float> %162, float %160, i32 2 | %163 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %164 = insertelement <4 x float> %163, float 0.000000e+00, i32 3 | %164 = extractelement <4 x float> %163, i32 2 %165 = insertelement <4 x float> undef, float %12, i32 0 | %165 = insertelement <4 x float> undef, float %160, i32 0 %166 = insertelement <4 x float> %165, float %13, i32 1 | %166 = insertelement <4 x float> %165, float %162, i32 1 %167 = insertelement <4 x float> %166, float %14, i32 2 | %167 = insertelement <4 x float> %166, float %164, i32 2 %168 = insertelement <4 x float> %167, float 0.000000e+00, i32 3 %168 = insertelement <4 x float> %167, float 0.000000e+00, i32 3 %169 = call float @llvm.AMDGPU.dp4(<4 x float> %164, <4 x float> %168) | %169 = insertelement <4 x float> undef, float %16, i32 0 %170 = insertelement <4 x float> undef, float %139, i32 0 | %170 = insertelement <4 x float> %169, float %17, i32 1 %171 = insertelement <4 x float> %170, float %154, i32 1 | %171 = insertelement <4 x float> %170, float %18, i32 2 %172 = insertelement <4 x float> %171, float %169, i32 2 | %172 = insertelement <4 x float> %171, float 0.000000e+00, i32 3 %173 = insertelement <4 x float> %172, float 0.000000e+00, i32 3 | %173 = call float @llvm.AMDGPU.dp4(<4 x float> %168, <4 x float> %172) %174 = insertelement <4 x float> undef, float %139, i32 0 | %174 = insertelement <4 x float> undef, float %143, i32 0 %175 = insertelement <4 x float> %174, float %154, i32 1 | %175 = insertelement <4 x float> %174, float %158, i32 1 %176 = insertelement <4 x float> %175, float %169, i32 2 | %176 = insertelement <4 x float> %175, float %173, i32 2 %177 = insertelement <4 x float> %176, float 0.000000e+00, i32 3 %177 = insertelement <4 x float> %176, float 0.000000e+00, i32 3 %178 = call float @llvm.AMDGPU.dp4(<4 x float> %173, <4 x float> %177) | %178 = insertelement <4 x float> undef, float %143, i32 0 %179 = call float @llvm.AMDGPU.rsq(float %178) | %179 = insertelement <4 x float> %178, float %158, i32 1 %180 = fmul float %139, %179 | %180 = insertelement <4 x float> %179, float %173, i32 2 %181 = fmul float %154, %179 | %181 = insertelement <4 x float> %180, float 0.000000e+00, i32 3 %182 = fmul float %169, %179 | %182 = call float @llvm.AMDGPU.dp4(<4 x float> %177, <4 x float> %181) %183 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 | %183 = call float @llvm.AMDGPU.rsq(float %182) %184 = extractelement <4 x float> %183, i32 0 | %184 = fmul float %143, %183 %185 = fmul float %4, %184 | %185 = fmul float %158, %183 %186 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 | %186 = fmul float %173, %183 %187 = extractelement <4 x float> %186, i32 1 | %187 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %188 = fmul float %5, %187 | %188 = extractelement <4 x float> %187, i32 0 %189 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 | %189 = fmul float %9, %188 %190 = extractelement <4 x float> %189, i32 2 | %190 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %191 = fmul float %190, %7 | %191 = extractelement <4 x float> %190, i32 1 %192 = fadd float %191, %185 | %192 = fmul float %10, %191 %193 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %193 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %194 = extractelement <4 x float> %193, i32 3 | %194 = extractelement <4 x float> %193, i32 2 %195 = fmul float %194, %7 | %195 = fmul float %194, %11 %196 = fadd float %195, %188 | %196 = fadd float %195, %189 %197 = load <4 x float> addrspace(8)* null | %197 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %198 = extractelement <4 x float> %197, i32 0 | %198 = extractelement <4 x float> %197, i32 3 %199 = fmul float %198, %32 | %199 = fmul float %198, %11 %200 = load <4 x float> addrspace(8)* null | %200 = fadd float %199, %192 %201 = extractelement <4 x float> %200, i32 1 | %201 = load <4 x float> addrspace(8)* null %202 = fmul float %201, %32 | %202 = extractelement <4 x float> %201, i32 0 %203 = load <4 x float> addrspace(8)* null | %203 = fmul float %202, %36 %204 = extractelement <4 x float> %203, i32 2 | %204 = load <4 x float> addrspace(8)* null %205 = fmul float %204, %32 | %205 = extractelement <4 x float> %204, i32 1 %206 = load <4 x float> addrspace(8)* null | %206 = fmul float %205, %36 %207 = extractelement <4 x float> %206, i32 3 | %207 = load <4 x float> addrspace(8)* null %208 = fmul float %207, %32 | %208 = extractelement <4 x float> %207, i32 2 %209 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 | %209 = fmul float %208, %36 %210 = extractelement <4 x float> %209, i32 0 | %210 = load <4 x float> addrspace(8)* null %211 = fmul float %210, %49 | %211 = extractelement <4 x float> %210, i32 3 %212 = fadd float %211, %199 | %212 = fmul float %211, %36 %213 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %213 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %214 = extractelement <4 x float> %213, i32 1 | %214 = extractelement <4 x float> %213, i32 0 %215 = fmul float %214, %49 | %215 = fmul float %214, %53 %216 = fadd float %215, %202 | %216 = fadd float %215, %203 %217 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %217 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %218 = extractelement <4 x float> %217, i32 2 | %218 = extractelement <4 x float> %217, i32 1 %219 = fmul float %218, %49 | %219 = fmul float %218, %53 %220 = fadd float %219, %205 | %220 = fadd float %219, %206 %221 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %221 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %222 = extractelement <4 x float> %221, i32 3 | %222 = extractelement <4 x float> %221, i32 2 %223 = fmul float %222, %49 | %223 = fmul float %222, %53 %224 = fadd float %223, %208 | %224 = fadd float %223, %209 %225 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2 | %225 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 %226 = extractelement <4 x float> %225, i32 0 | %226 = extractelement <4 x float> %225, i32 3 %227 = fmul float %226, %66 | %227 = fmul float %226, %53 %228 = fadd float %227, %212 %228 = fadd float %227, %212 %229 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2 %229 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2 %230 = extractelement <4 x float> %229, i32 1 | %230 = extractelement <4 x float> %229, i32 0 %231 = fmul float %230, %66 | %231 = fmul float %230, %70 %232 = fadd float %231, %216 %232 = fadd float %231, %216 %233 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2 %233 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2 %234 = extractelement <4 x float> %233, i32 2 | %234 = extractelement <4 x float> %233, i32 1 %235 = fmul float %234, %66 | %235 = fmul float %234, %70 %236 = fadd float %235, %220 %236 = fadd float %235, %220 %237 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2 %237 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2 %238 = extractelement <4 x float> %237, i32 3 | %238 = extractelement <4 x float> %237, i32 2 %239 = fmul float %238, %66 | %239 = fmul float %238, %70 %240 = fadd float %239, %224 %240 = fadd float %239, %224 %241 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3 | %241 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2 %242 = extractelement <4 x float> %241, i32 0 | %242 = extractelement <4 x float> %241, i32 3 %243 = fadd float %228, %242 | %243 = fmul float %242, %70 %244 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3 | %244 = fadd float %243, %228 %245 = extractelement <4 x float> %244, i32 1 | %245 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3 %246 = fadd float %232, %245 | %246 = extractelement <4 x float> %245, i32 0 %247 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3 | %247 = fadd float %232, %246 %248 = extractelement <4 x float> %247, i32 2 | %248 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3 %249 = fadd float %236, %248 | %249 = extractelement <4 x float> %248, i32 1 %250 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3 | %250 = fadd float %236, %249 %251 = extractelement <4 x float> %250, i32 3 | %251 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3 %252 = fadd float %240, %251 | %252 = extractelement <4 x float> %251, i32 2 %253 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 | %253 = fadd float %240, %252 %254 = extractelement <4 x float> %253, i32 0 | %254 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3 %255 = fmul float %254, %180 | %255 = extractelement <4 x float> %254, i32 3 %256 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 | %256 = fadd float %244, %255 %257 = extractelement <4 x float> %256, i32 1 | %257 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 %258 = fmul float %257, %180 | %258 = extractelement <4 x float> %257, i32 0 %259 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 | %259 = fmul float %258, %184 %260 = extractelement <4 x float> %259, i32 2 | %260 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 %261 = fmul float %260, %180 | %261 = extractelement <4 x float> %260, i32 1 %262 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 | %262 = fmul float %261, %184 %263 = extractelement <4 x float> %262, i32 3 | %263 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 %264 = fmul float %263, %180 | %264 = extractelement <4 x float> %263, i32 2 %265 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 | %265 = fmul float %264, %184 %266 = extractelement <4 x float> %265, i32 0 | %266 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 %267 = fmul float %266, %181 | %267 = extractelement <4 x float> %266, i32 3 %268 = fadd float %267, %255 | %268 = fmul float %267, %184 %269 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %269 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %270 = extractelement <4 x float> %269, i32 1 | %270 = extractelement <4 x float> %269, i32 0 %271 = fmul float %270, %181 | %271 = fmul float %270, %185 %272 = fadd float %271, %258 | %272 = fadd float %271, %259 %273 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %273 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %274 = extractelement <4 x float> %273, i32 2 | %274 = extractelement <4 x float> %273, i32 1 %275 = fmul float %274, %181 | %275 = fmul float %274, %185 %276 = fadd float %275, %261 | %276 = fadd float %275, %262 %277 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %277 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %278 = extractelement <4 x float> %277, i32 3 | %278 = extractelement <4 x float> %277, i32 2 %279 = fmul float %278, %181 | %279 = fmul float %278, %185 %280 = fadd float %279, %264 | %280 = fadd float %279, %265 %281 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 | %281 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %282 = extractelement <4 x float> %281, i32 0 | %282 = extractelement <4 x float> %281, i32 3 %283 = fmul float %282, %182 | %283 = fmul float %282, %185 %284 = fadd float %283, %268 %284 = fadd float %283, %268 %285 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %285 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %286 = extractelement <4 x float> %285, i32 1 | %286 = extractelement <4 x float> %285, i32 0 %287 = fmul float %286, %182 | %287 = fmul float %286, %186 %288 = fadd float %287, %272 %288 = fadd float %287, %272 %289 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %289 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %290 = extractelement <4 x float> %289, i32 2 | %290 = extractelement <4 x float> %289, i32 1 %291 = fmul float %290, %182 | %291 = fmul float %290, %186 %292 = fadd float %291, %276 %292 = fadd float %291, %276 %293 = fmul float %124, %181 | %293 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %294 = fmul float %122, %182 | %294 = extractelement <4 x float> %293, i32 2 %295 = fmul float %123, %180 | %295 = fmul float %294, %186 %296 = fsub float -0.000000e+00, %293 | %296 = fadd float %295, %280 %297 = fmul float %123, %182 | %297 = fmul float %128, %185 %298 = fadd float %297, %296 | %298 = fmul float %126, %186 %299 = fsub float -0.000000e+00, %294 | %299 = fmul float %127, %184 %300 = fmul float %124, %180 | %300 = fsub float -0.000000e+00, %297 %301 = fadd float %300, %299 | %301 = fmul float %127, %186 %302 = fsub float -0.000000e+00, %295 | %302 = fadd float %301, %300 %303 = fmul float %122, %181 | %303 = fsub float -0.000000e+00, %298 %304 = fadd float %303, %302 | %304 = fmul float %128, %184 %305 = fmul float %298, %15 | %305 = fadd float %304, %303 %306 = fmul float %301, %15 | %306 = fsub float -0.000000e+00, %299 %307 = fmul float %304, %15 | %307 = fmul float %126, %185 %308 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 | %308 = fadd float %307, %306 %309 = extractelement <4 x float> %308, i32 0 | %309 = fmul float %302, %19 %310 = fmul float %309, %305 | %310 = fmul float %305, %19 %311 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 | %311 = fmul float %308, %19 %312 = extractelement <4 x float> %311, i32 1 | %312 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 %313 = fmul float %312, %305 | %313 = extractelement <4 x float> %312, i32 0 %314 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 | %314 = fmul float %313, %309 %315 = extractelement <4 x float> %314, i32 2 | %315 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 %316 = fmul float %315, %305 | %316 = extractelement <4 x float> %315, i32 1 %317 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 | %317 = fmul float %316, %309 %318 = extractelement <4 x float> %317, i32 0 | %318 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 %319 = fmul float %318, %306 | %319 = extractelement <4 x float> %318, i32 2 %320 = fadd float %319, %310 | %320 = fmul float %319, %309 %321 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %321 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %322 = extractelement <4 x float> %321, i32 1 | %322 = extractelement <4 x float> %321, i32 0 %323 = fmul float %322, %306 | %323 = fmul float %322, %310 %324 = fadd float %323, %313 | %324 = fadd float %323, %314 %325 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %325 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %326 = extractelement <4 x float> %325, i32 2 | %326 = extractelement <4 x float> %325, i32 1 %327 = fmul float %326, %306 | %327 = fmul float %326, %310 %328 = fadd float %327, %316 | %328 = fadd float %327, %317 %329 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 | %329 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %330 = extractelement <4 x float> %329, i32 0 | %330 = extractelement <4 x float> %329, i32 2 %331 = fmul float %330, %307 | %331 = fmul float %330, %310 %332 = fadd float %331, %320 %332 = fadd float %331, %320 %333 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %333 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %334 = extractelement <4 x float> %333, i32 1 | %334 = extractelement <4 x float> %333, i32 0 %335 = fmul float %334, %307 | %335 = fmul float %334, %311 %336 = fadd float %335, %324 %336 = fadd float %335, %324 %337 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %337 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %338 = extractelement <4 x float> %337, i32 2 | %338 = extractelement <4 x float> %337, i32 1 %339 = fmul float %338, %307 | %339 = fmul float %338, %311 %340 = fadd float %339, %328 %340 = fadd float %339, %328 %341 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 | %341 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %342 = extractelement <4 x float> %341, i32 0 | %342 = extractelement <4 x float> %341, i32 2 %343 = fmul float %342, %122 | %343 = fmul float %342, %311 %344 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 | %344 = fadd float %343, %332 %345 = extractelement <4 x float> %344, i32 1 | %345 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 %346 = fmul float %345, %122 | %346 = extractelement <4 x float> %345, i32 0 %347 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 | %347 = fmul float %346, %126 %348 = extractelement <4 x float> %347, i32 2 | %348 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 %349 = fmul float %348, %122 | %349 = extractelement <4 x float> %348, i32 1 %350 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 | %350 = fmul float %349, %126 %351 = extractelement <4 x float> %350, i32 0 | %351 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 %352 = fmul float %351, %123 | %352 = extractelement <4 x float> %351, i32 2 %353 = fadd float %352, %343 | %353 = fmul float %352, %126 %354 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %354 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %355 = extractelement <4 x float> %354, i32 1 | %355 = extractelement <4 x float> %354, i32 0 %356 = fmul float %355, %123 | %356 = fmul float %355, %127 %357 = fadd float %356, %346 | %357 = fadd float %356, %347 %358 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %358 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %359 = extractelement <4 x float> %358, i32 2 | %359 = extractelement <4 x float> %358, i32 1 %360 = fmul float %359, %123 | %360 = fmul float %359, %127 %361 = fadd float %360, %349 | %361 = fadd float %360, %350 %362 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 | %362 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 %363 = extractelement <4 x float> %362, i32 0 | %363 = extractelement <4 x float> %362, i32 2 %364 = fmul float %363, %124 | %364 = fmul float %363, %127 %365 = fadd float %364, %353 %365 = fadd float %364, %353 %366 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %366 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %367 = extractelement <4 x float> %366, i32 1 | %367 = extractelement <4 x float> %366, i32 0 %368 = fmul float %367, %124 | %368 = fmul float %367, %128 %369 = fadd float %368, %357 %369 = fadd float %368, %357 %370 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %370 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %371 = extractelement <4 x float> %370, i32 2 | %371 = extractelement <4 x float> %370, i32 1 %372 = fmul float %371, %124 | %372 = fmul float %371, %128 %373 = fadd float %372, %361 %373 = fadd float %372, %361 %374 = insertelement <4 x float> undef, float %243, i32 0 | %374 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 %375 = insertelement <4 x float> %374, float %246, i32 1 | %375 = extractelement <4 x float> %374, i32 2 %376 = insertelement <4 x float> %375, float %249, i32 2 | %376 = fmul float %375, %128 %377 = insertelement <4 x float> %376, float %252, i32 3 | %377 = fadd float %376, %365 call void @llvm.R600.store.swizzle(<4 x float> %377, i32 60, i32 1) | %378 = insertelement <4 x float> undef, float %247, i32 0 %378 = insertelement <4 x float> undef, float %192, i32 0 | %379 = insertelement <4 x float> %378, float %250, i32 1 %379 = insertelement <4 x float> %378, float %196, i32 1 | %380 = insertelement <4 x float> %379, float %253, i32 2 %380 = insertelement <4 x float> %379, float %11, i32 2 | %381 = insertelement <4 x float> %380, float %256, i32 3 %381 = insertelement <4 x float> %380, float %7, i32 3 | call void @llvm.R600.store.swizzle(<4 x float> %381, i32 60, i32 1) call void @llvm.R600.store.swizzle(<4 x float> %381, i32 0, i32 2) | %382 = insertelement <4 x float> undef, float %196, i32 0 %382 = insertelement <4 x float> undef, float %284, i32 0 | %383 = insertelement <4 x float> %382, float %200, i32 1 %383 = insertelement <4 x float> %382, float %288, i32 1 | %384 = insertelement <4 x float> %383, float %15, i32 2 %384 = insertelement <4 x float> %383, float %292, i32 2 | %385 = insertelement <4 x float> %384, float %11, i32 3 %385 = insertelement <4 x float> %384, float %280, i32 3 | call void @llvm.R600.store.swizzle(<4 x float> %385, i32 0, i32 2) call void @llvm.R600.store.swizzle(<4 x float> %385, i32 1, i32 2) | %386 = insertelement <4 x float> undef, float %288, i32 0 %386 = insertelement <4 x float> undef, float %332, i32 0 | %387 = insertelement <4 x float> %386, float %292, i32 1 %387 = insertelement <4 x float> %386, float %336, i32 1 | %388 = insertelement <4 x float> %387, float %296, i32 2 %388 = insertelement <4 x float> %387, float %340, i32 2 | %389 = insertelement <4 x float> %388, float %284, i32 3 %389 = insertelement <4 x float> %388, float 0.000000e+00, i32 3 | call void @llvm.R600.store.swizzle(<4 x float> %389, i32 1, i32 2) call void @llvm.R600.store.swizzle(<4 x float> %389, i32 2, i32 2) | %390 = insertelement <4 x float> undef, float %336, i32 0 %390 = insertelement <4 x float> undef, float %365, i32 0 | %391 = insertelement <4 x float> %390, float %340, i32 1 %391 = insertelement <4 x float> %390, float %369, i32 1 | %392 = insertelement <4 x float> %391, float %344, i32 2 %392 = insertelement <4 x float> %391, float %373, i32 2 < %393 = insertelement <4 x float> %392, float 0.000000e+00, i32 3 %393 = insertelement <4 x float> %392, float 0.000000e+00, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %393, i32 3, i32 2) | call void @llvm.R600.store.swizzle(<4 x float> %393, i32 2, i32 2) > %394 = insertelement <4 x float> undef, float %369, i32 0 > %395 = insertelement <4 x float> %394, float %373, i32 1 > %396 = insertelement <4 x float> %395, float %377, i32 2 > %397 = insertelement <4 x float> %396, float 0.000000e+00, i32 3 > call void @llvm.R600.store.swizzle(<4 x float> %397, i32 3, i32 2) ret void ret void } } ; Function Attrs: readnone ; Function Attrs: readnone declare float @llvm.R600.load.input(i32) #1 < < ; Function Attrs: readnone < declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 ; Function Attrs: readnone ; Function Attrs: readnone declare float @llvm.AMDGPU.rsq(float) #1 declare float @llvm.AMDGPU.rsq(float) #1 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } attributes #0 = { "ShaderType"="1" } attributes #1 = { readnone } attributes #1 = { readnone } # Machine code for function main: Post SSA, not tracking liveness < Function Live Ins: %T4_W, %T4_Z, %T4_Y, %T4_X, %T3_W, %T3_Z, %T3_Y, %T3_X, %T2_W, %T2_Y, %T2_X, %T1_W, %T1_Z < < BB#0: derived from LLVM BB %main_body < Live Ins: %T4_W %T4_Z %T4_Y %T4_X %T3_W %T3_Z %T3_Y %T3_X %T2_W %T2_Y %T2_X %T1_W %T1_Z %T1_Y %T1_X < CF_CALL_FS_R600 < CF_ALU 8, 0, 0, 2, 0, 0, 0, 111, 1 < R600_ExportSwz %T11_XYZW, 1, 60, 0, 1, 2, 3, 40, 0 < R600_ExportSwz %T0_XYZW, 2, 0, 0, 1, 2, 3, 39, 0 < R600_ExportSwz %T12_XYZW, 2, 1, 0, 1, 2, 3, 39, 0 < R600_ExportSwz %T9_XYZW, 2, 2, 0, 1, 2, 4, 39, 0 < R600_ExportSwz %T14_XYZW, 2, 3, 0, 1, 2, 4, 40, 1 < CF_END_R600 < ALU_CLAUSE 8 < %T0_W = MOV 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW < %T2_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_X, 0, 0, 0, -1, %KC0_139_X, 0, 0, 0, 2092, 1, pred < %T0_X = MULADD_IEEE_r600 0, 0, %KC0_139_Z, 0, 0, 2094, %PV_W, 0, 0, -1, %PS, 0, 0, -1, 0, < %T2_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %KC0_139_Y, 0, 0, 0, 2093, 1, pred < %T0_Y = MULADD_IEEE_r600 0, 0, %KC0_139_W, 0, 0, 2095, %T0_W, 0, 0, -1, %PV_W, 0, 0, -1, < %T0_Z = MOV 1, 0, 0, 0, %T3_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW < %T2_X = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_136_X, 0, 0, 0, 2080, %T3_X, 0, 0, 0, -1, 0, pred:%PRE < %T2_Y = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_136_Y, 0, 0, 0, 2081, %T3_Y, 0, 0, 0, -1, 0, pred:%PRE < %T2_Z = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_136_Z, 0, 0, 0, 2082, %T3_Z, 0, 0, 0, -1, 0, pred:%PRE < %T2_W = DOT4_r600 0, 0, 0, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O < %T2_X = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_137_X, 0, 0, 0, 2084, %T3_X, 0, 0, 0, -1, 0, pred:%PRE < %T2_Y = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_137_Y, 0, 0, 0, 2085, %T3_Y, 0, 0, 0, -1, 0, pred:%PRE < %T2_Z = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_137_Z, 0, 0, 0, 2086, %T3_Z, 0, 0, 0, -1, 0, pred:%PRE < %T2_W = DOT4_r600 0, 0, 0, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O < %T2_X = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_138_X, 0, 0, 0, 2088, %T3_X, 0, 0, 0, -1, 0, pred:%PRE < %T2_Y = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_138_Y, 0, 0, 0, 2089, %T3_Y, 0, 0, 0, -1, 0, pred:%PRE < %T2_Z = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_138_Z, 0, 0, 0, 2090, %T3_Z, 0, 0, 0, -1, 0, pred:%PRE < %T2_W = DOT4_r600 0, 0, 0, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O < %T3_X = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_136_X, 0, 0, 0, 2080, %T4_X, 0, 0, 0, -1, 0, pred:%PRE < %T3_Y = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_136_Y, 0, 0, 0, 2081, %T4_Y, 0, 0, 0, -1, 0, pred:%PRE < %T3_Z = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_136_Z, 0, 0, 0, 2082, %T4_Z, 0, 0, 0, -1, 0, pred:%PRE < %T3_W = DOT4_r600 0, 0, 0, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O < %T3_X = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_137_X, 0, 0, 0, 2084, %T4_X, 0, 0, 0, -1, 0, pred:%PRE < %T3_Y = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_137_Y, 0, 0, 0, 2085, %T4_Y, 0, 0, 0, -1, 0, pred:%PRE < %T3_Z = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_137_Z, 0, 0, 0, 2086, %T4_Z, 0, 0, 0, -1, 0, pred:%PRE < %T3_W = DOT4_r600 0, 0, 0, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O < %T3_X = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_138_X, 0, 0, 0, 2088, %T4_X, 0, 0, 0, -1, 0, pred:%PRE < %T3_Y = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_138_Y, 0, 0, 0, 2089, %T4_Y, 0, 0, 0, -1, 0, pred:%PRE < %T3_Z = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_138_Z, 0, 0, 0, 2090, %T4_Z, 0, 0, 0, -1, 0, pred:%PRE < %T3_W = DOT4_r600 0, 0, 0, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O < %T2_X = DOT4_r600 0, 0, 0, 0, 0, 0, %T3_X, 0, 0, 0, -1, %T3_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_O < %T2_Y = DOT4_r600 0, 0, 0, 0, 0, 0, %T3_Y, 0, 0, 0, -1, %T3_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_O < %T2_Z = DOT4_r600 0, 0, 0, 0, 0, 0, %T3_Z, 0, 0, 0, -1, %T3_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_O < %T2_W = DOT4_r600 0, 0, 1, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O < %T2_W = RECIPSQRT_CLAMPED_r600 1, 0, 0, 0, %PV_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, < %T3_X = DOT4_r600 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T2_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_O < %T3_Y = DOT4_r600 0, 0, 0, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T2_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_O < %T3_Z = DOT4_r600 0, 0, 0, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T2_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_O < %T3_W = DOT4_r600 0, 0, 1, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O < %T3_W = RECIPSQRT_CLAMPED_r600 1, 0, 0, 0, %PV_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, < %T5_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %PS, 0, 0, 0, -1, 0, pred:%PRED_SE < %T6_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T3_Y, 0, 0, 0, -1, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_ < %T3_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T3_W, 0, 0, 0, -1, 0, pred:%PRED_ < %T2_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %PV_W, 0, 0, 0, -1, %PS, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, < %T7_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T3_Z, 0, 0, 0, -1, %T2_W, 0, 0, 0, -1, 0, pred:%PRED_ < %T3_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T3_W, 0, 0, 0, -1, 1, pred: < %T2_Z = MULADD_IEEE_r600 0, 0, %PS, 0, 0, -1, %PV_W, 0, 0, -1, %PV_Z, 1, 0, -1, 0, pred:% < %T8_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %PV_Y, 0, 0, 0, -1, %PV_W, 0, 0, 0, -1, 0, pred:%PRED_SEL_OF < %T2_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T3_X, 0, 0, 0, -1, %T2_W, 0, 0, 0, -1, 1, pred: < %T3_Z = MULADD_IEEE_r600 0, 0, %T5_W, 0, 0, -1, %PS, 0, 0, -1, %PV_W, 1, 0, -1, 0, pred:% < %T8_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %PV_Z, 0, 0, 0, -1, %T4_W, 0, 0, 0, -1, 0, pred:%PRED_ < %T9_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T3_W, 0, 0, 0, -1, %PS, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, < %T2_Z = MULADD_IEEE_r600 0, 0, %T3_Y, 0, 0, -1, %T6_W, 0, 0, -1, %PS, 1, 0, -1, 0, pred:% < %T9_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_X, 0, 0, 0, 2064, %PV_W, 0, 0, 0, -1, 0, pred:%PRED < %T10_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %PV_Z, 0, 0, 0, -1, %T4_W, 0, 0, 0, -1, 1, pred:%PRED < %T9_W = MULADD_IEEE_r600 0, 0, %KC0_133_X, 0, 0, 2068, %PS, 0, 0, -1, %PV_W, 0, 0, -1, 0, < %T4_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %PV_Z, 0, 0, 0, -1, %T4_W, 0, 0, 0, -1, 1, pred: < %T9_X = MULADD_IEEE_r600 0, 0, %KC0_134_X, 0, 0, 2072, %PS, 0, 0, -1, %PV_W, 0, 0, -1, 0, < %T11_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_X, 0, 0, 0, 2064, %T2_W, 0, 0, 0, -1, 1, pred:%PRE < %T2_X = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_136_X, 0, 0, 0, 2080, %T1_X, 0, 0, 0, -1, 0, pred:%PRE < %T2_Y = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_136_Y, 0, 0, 0, 2081, %T1_Y, 0, 0, 0, -1, 0, pred:%PRE < %T2_Z = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_136_Z, 0, 0, 0, 2082, %T1_Z, 0, 0, 0, -1, 0, pred:%PRE < %T2_W = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_136_W, 0, 0, 0, 2083, %T1_W, 0, 0, 0, -1, 1, pred:%PRE < %T2_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_128_X, 0, 0, 0, 2048, %PV_X, 0, 0, 0, -1, 0, pred:%PRED < %T11_W = MULADD_IEEE_r600 0, 0, %KC0_133_X, 0, 0, 2068, %T6_W, 0, 0, -1, %T11_W, 0, 0, -1 < %T12_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_W, 0, 0, 0, 2067, %T2_W, 0, 0, 0, -1, 1, pred:%PRE < %T12_W = MULADD_IEEE_r600 0, 0, %KC0_133_W, 0, 0, 2071, %T6_W, 0, 0, -1, %PV_W, 0, 0, -1, < %T2_X = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_137_X, 0, 0, 0, 2084, %T1_X, 0, 0, 0, -1, 0, pred:%PRE < %T2_Y = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_137_Y, 0, 0, 0, 2085, %T1_Y, 0, 0, 0, -1, 0, pred:%PRE < %T2_Z = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_137_Z, 0, 0, 0, 2086, %T1_Z, 0, 0, 0, -1, 0, pred:%PRE < %T2_W = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_137_W, 0, 0, 0, 2087, %T1_W, 0, 0, 0, -1, 1, pred:%PRE < %T12_X = MULADD_IEEE_r600 0, 0, %KC0_134_X, 0, 0, 2072, %T7_W, 0, 0, -1, %T11_W, 0, 0, -1 < %T11_W = MULADD_IEEE_r600 0, 0, %KC0_129_X, 0, 0, 2052, %PV_X, 0, 0, -1, %T2_Z, 0, 0, -1, < %T13_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Y, 0, 0, 0, 2065, %T2_W, 0, 0, 0, -1, 0, pred:%PRE < %T14_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_128_Y, 0, 0, 0, 2049, %T2_X, 0, 0, 0, -1, 1, pred:%PRE < %T1_X = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_138_X, 0, 0, 0, 2088, %T1_X, 0, 0, 0, -1, 0, pred:%PRE < %T1_Y = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_138_Y, 0, 0, 0, 2089, %T1_Y, 0, 0, 0, -1, 0, pred:%PRE < %T1_Z = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_138_Z, 0, 0, 0, 2090, %T1_Z, 0, 0, 0, -1, 0, pred:%PRE < %T1_W = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_138_W, 0, 0, 0, 2091, %T1_W, 0, 0, 0, -1, 1, pred:%PRE < %T1_Z = MULADD_IEEE_r600 0, 0, %KC0_129_Y, 0, 0, 2053, %T2_Y, 0, 0, -1, %T14_W, 0, 0, -1, < %T1_W = MULADD_IEEE_r600 0, 0, %KC0_133_Y, 0, 0, 2069, %T6_W, 0, 0, -1, %T13_W, 0, 0, -1, < %T11_W = MULADD_IEEE_r600 0, 0, %KC0_130_X, 0, 0, 2056, %T1_X, 0, 0, -1, %T11_W, 0, 0, -1 < %T11_X = ADD 0, 0, 1, 0, 0, 0, %PV_W, 0, 0, 0, -1, %KC0_131_X, 0, 0, 0, 2060, 0, pred:%PR < %T12_Y = MULADD_IEEE_r600 0, 0, %KC0_134_Y, 0, 0, 2073, %T7_W, 0, 0, -1, %T1_W, 0, 0, -1, < %T1_W = MULADD_IEEE_r600 0, 0, %KC0_130_Y, 0, 0, 2057, %T1_X, 0, 0, -1, %T1_Z, 0, 0, -1, < %T11_Y = ADD 0, 0, 1, 0, 0, 0, %PV_W, 0, 0, 0, -1, %KC0_131_Y, 0, 0, 0, 2061, 0, pred:%PR < %T1_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_X, 0, 0, 0, 2064, %T3_Y, 0, 0, 0, -1, 0, pred:%PRED < %T13_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Y, 0, 0, 0, 2065, %T8_W, 0, 0, 0, -1, 1, pred:%PRE < %T1_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Y, 0, 0, 0, 2065, %T3_Y, 0, 0, 0, -1, 0, pred:%PRED < %T13_W = MULADD_IEEE_r600 0, 0, %KC0_133_Y, 0, 0, 2069, %T10_W, 0, 0, -1, %PS, 0, 0, -1, < %T1_W = MULADD_IEEE_r600 0, 0, %KC0_133_X, 0, 0, 2068, %T3_W, 0, 0, -1, %PV_W, 0, 0, -1, < %T14_X = MULADD_IEEE_r600 0, 0, %KC0_134_X, 0, 0, 2072, %T5_W, 0, 0, -1, %PS, 0, 0, -1, 0 < %T9_Y = MULADD_IEEE_r600 0, 0, %KC0_134_Y, 0, 0, 2073, %T4_W, 0, 0, -1, %PV_W, 0, 0, -1, < %T1_W = MULADD_IEEE_r600 0, 0, %KC0_133_Y, 0, 0, 2069, %T3_W, 0, 0, -1, %PV_Z, 0, 0, -1, < %T14_Y = MULADD_IEEE_r600 0, 0, %KC0_134_Y, 0, 0, 2073, %T5_W, 0, 0, -1, %PV_W, 0, 0, -1, < %T1_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Z, 0, 0, 0, 2066, %T3_Y, 0, 0, 0, -1, 1, pred < %T13_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_128_Z, 0, 0, 0, 2050, %T2_X, 0, 0, 0, -1, 1, pred:%PRE < %T1_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Z, 0, 0, 0, 2066, %T2_W, 0, 0, 0, -1, 0, pred < %T1_Z = MULADD_IEEE_r600 0, 0, %KC0_129_Z, 0, 0, 2054, %T2_Y, 0, 0, -1, %PV_W, 0, 0, -1, < %T2_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Z, 0, 0, 0, 2066, %T8_W, 0, 0, 0, -1, 1, pred < %T1_W = MULADD_IEEE_r600 0, 0, %KC0_133_Z, 0, 0, 2070, %T3_W, 0, 0, -1, %T1_W, 0, 0 < %T14_Z = MULADD_IEEE_r600 0, 0, %KC0_134_Z, 0, 0, 2074, %T5_W, 0, 0, -1, %PV_W, 0, < %T1_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_128_W, 0, 0, 0, 2051, %T2_X, 0, 0, 0, -1, 1, pred < %T2_W = MULADD_IEEE_r600 0, 0, %KC0_133_Z, 0, 0, 2070, %T10_W, 0, 0, -1, %T2_W, 0, < %T9_Z = MULADD_IEEE_r600 0, 0, %KC0_134_Z, 0, 0, 2074, %T4_W, 0, 0, -1, %PV_W, 0, 0 < %T1_W = MULADD_IEEE_r600 0, 0, %KC0_129_W, 0, 0, 2055, %T2_Y, 0, 0, -1, %T1_W, 0, 0 < %T2_W = MULADD_IEEE_r600 0, 0, %KC0_130_Z, 0, 0, 2058, %T1_X, 0, 0, -1, %T1_Z, 0, 0, -1, < %T11_Z = ADD 0, 0, 1, 0, 0, 0, %PV_W, 0, 0, 0, -1, %KC0_131_Z, 0, 0, 0, 2062, 0, pred:%PR < %T1_W = MULADD_IEEE_r600 0, 0, %KC0_130_W, 0, 0, 2059, %T1_X, 0, 0, -1, %T1_W, 0, 0 < %T2_W = MULADD_IEEE_r600 0, 0, %KC0_133_Z, 0, 0, 2070, %T6_W, 0, 0, -1, %T1_Y, 0, 0 < %T12_Z = MULADD_IEEE_r600 0, 0, %KC0_134_Z, 0, 0, 2074, %T7_W, 0, 0, -1, %PV_W, 0, < %T11_W = ADD 0, 0, 1, 0, 0, 0, %T1_W, 0, 0, 0, -1, %KC0_131_W, 0, 0, 0, 2063, 1, pred:%PR < < # End machine code for function main. < < < Shader Disassembly: < < CALL_FS ; 00000000 89800000 < ALU 111, @8, KC0[CB0:0-32], KC1[] ; 80000008 A1BC0000 < EXPORT T11.XYZW ; C005A03C 94000688 < EXPORT T0.XYZW ; C0004000 93800688 < EXPORT T12.XYZW ; C0064001 93800688 < EXPORT T9.XYZ0 ; C004C002 93800888 < EXPORT T14.XYZ0 ; C0074003 94200888 < CF_END ; 00000000 80200000 < ALU clause starting at 8: ; < MOV T0.W, T2.W, ; 00000C02 60001910 < MUL_IEEE * T2.W, T2.X, KC0[11].X, ; 80116002 60400210 < MULADD_IEEE T0.X, KC0[11].Z, PV.W, PS, ; 019FC88B 000280FF < MUL_IEEE * T2.W, T2.Y, KC0[11].Y, ; 80916402 60400210 < MULADD_IEEE T0.Y, KC0[11].W, T0.W, PV.W, ; 01800C8B 20028CFE < MOV * T0.Z, T3.W, ; 80000C03 40001910 < DOT4 T2.X, KC0[8].X, T3.X, ; 00006088 00405010 < DOT4 T2.Y (MASKED), KC0[8].Y, T3.Y, ; 00806488 20405000 < DOT4 T2.Z (MASKED), KC0[8].Z, T3.Z, ; 01006888 40405000 < DOT4 * T2.W (MASKED), 0.0, 0.0, ; 801F00F8 60405000 < DOT4 T2.X (MASKED), KC0[9].X, T3.X, ; 00006089 00405000 < DOT4 T2.Y, KC0[9].Y, T3.Y, ; 00806489 20405010 < DOT4 T2.Z (MASKED), KC0[9].Z, T3.Z, ; 01006889 40405000 < DOT4 * T2.W (MASKED), 0.0, 0.0, ; 801F00F8 60405000 < DOT4 T2.X (MASKED), KC0[10].X, T3.X, ; 0000608A 00405000 < DOT4 T2.Y (MASKED), KC0[10].Y, T3.Y, ; 0080648A 20405000 < DOT4 T2.Z, KC0[10].Z, T3.Z, ; 0100688A 40405010 < DOT4 * T2.W (MASKED), 0.0, 0.0, ; 801F00F8 60405000 < DOT4 T3.X, KC0[8].X, T4.X, ; 00008088 00605010 < DOT4 T3.Y (MASKED), KC0[8].Y, T4.Y, ; 00808488 20605000 < DOT4 T3.Z (MASKED), KC0[8].Z, T4.Z, ; 01008888 40605000 < DOT4 * T3.W (MASKED), 0.0, 0.0, ; 801F00F8 60605000 < DOT4 T3.X (MASKED), KC0[9].X, T4.X, ; 00008089 00605000 < DOT4 T3.Y, KC0[9].Y, T4.Y, ; 00808489 20605010 < DOT4 T3.Z (MASKED), KC0[9].Z, T4.Z, ; 01008889 40605000 < DOT4 * T3.W (MASKED), 0.0, 0.0, ; 801F00F8 60605000 < DOT4 T3.X (MASKED), KC0[10].X, T4.X, ; 0000808A 00605000 < DOT4 T3.Y (MASKED), KC0[10].Y, T4.Y, ; 0080848A 20605000 < DOT4 T3.Z, KC0[10].Z, T4.Z, ; 0100888A 40605010 < DOT4 * T3.W (MASKED), 0.0, 0.0, ; 801F00F8 60605000 < DOT4 T2.X (MASKED), T3.X, T3.X, ; 00006003 00405000 < DOT4 T2.Y (MASKED), T3.Y, T3.Y, ; 00806403 20405000 < DOT4 T2.Z (MASKED), T3.Z, T3.Z, ; 01006803 40405000 < DOT4 * T2.W, 0.0, 0.0, ; 801F00F8 60405010 < RECIPSQRT_CLAMPED * T2.W, PV.X, ; 800000FE 60406710 < DOT4 T3.X (MASKED), T2.X, T2.X, ; 00004002 00605000 < DOT4 T3.Y (MASKED), T2.Y, T2.Y, ; 00804402 20605000 < DOT4 T3.Z (MASKED), T2.Z, T2.Z, ; 01004802 40605000 < DOT4 * T3.W, 0.0, 0.0, ; 801F00F8 60605010 < RECIPSQRT_CLAMPED * T3.W, PV.X, ; 800000FE 60606710 < MUL_IEEE T5.W, T2.Z, PS, ; 001FE802 60A00210 < MUL_IEEE * T6.W, T3.Y, T2.W, ; 81804403 60C00210 < MUL_IEEE T3.Y, T2.X, T3.W, ; 01806002 20600210 < MUL_IEEE T2.Z, PV.W, PS, ; 001FECFE 40400210 < MUL_IEEE T7.W, T3.Z, T2.W, BS:VEC_021/SCL_122 ; 01804803 60E40210 < MUL_IEEE * T3.W, T2.Y, T3.W, ; 81806402 60600210 < MULADD_IEEE T2.Z, PS, PV.W, -PV.Z, ; 019FC0FF 404298FE < MUL_IEEE T8.W, PV.Y, PV.W, ; 019FC4FE 61000210 < MUL_IEEE * T2.W, T3.X, T2.W, ; 81804003 60400210 < MULADD_IEEE T3.Z, T5.W, PS, -PV.W, ; 001FEC05 40629CFE < MUL_IEEE T8.W, PV.Z, T4.W, ; 018088FE 61000210 < MUL_IEEE * T9.W, T3.W, PS, ; 801FEC03 61200210 < MULADD_IEEE T2.Z, T3.Y, T6.W, -PS, BS:VEC_021/SCL_122 ; 0180C403 404690FF < MUL_IEEE T9.W, KC0[4].X, PV.W, ; 019FC084 61200210 < MUL_IEEE * T10.W, PV.Z, T4.W, ; 818088FE 61400210 < MULADD_IEEE T9.W, KC0[5].X, PS, PV.W, ; 001FE085 61228CFE < MUL_IEEE * T4.W, PV.Z, T4.W, ; 818088FE 60800210 < MULADD_IEEE T9.X, KC0[6].X, PS, PV.W, ; 001FE086 01228CFE < MUL_IEEE * T11.W, KC0[4].X, T2.W, ; 81804084 61600210 < DOT4 T2.X, KC0[8].X, T1.X, ; 00002088 00405010 < DOT4 T2.Y (MASKED), KC0[8].Y, T1.Y, ; 00802488 20405000 < DOT4 T2.Z (MASKED), KC0[8].Z, T1.Z, ; 01002888 40405000 < DOT4 * T2.W (MASKED), KC0[8].W, T1.W, ; 81802C88 60405000 < MUL_IEEE T2.Z, KC0[0].X, PV.X, ; 001FC080 40400210 < MULADD_IEEE * T11.W, KC0[5].X, T6.W, T11.W, ; 8180C085 61628C0B < MUL_IEEE * T12.W, KC0[4].W, T2.W, ; 81804C84 61800210 < MULADD_IEEE * T12.W, KC0[5].W, T6.W, PV.W, ; 8180CC85 61828CFE < DOT4 T2.X (MASKED), KC0[9].X, T1.X, ; 00002089 00405000 < DOT4 T2.Y, KC0[9].Y, T1.Y, ; 00802489 20405010 < DOT4 T2.Z (MASKED), KC0[9].Z, T1.Z, ; 01002889 40405000 < DOT4 * T2.W (MASKED), KC0[9].W, T1.W, ; 81802C89 60405000 < MULADD_IEEE T12.X, KC0[6].X, T7.W, T11.W, ; 0180E086 01828C0B < MULADD_IEEE * T11.W, KC0[1].X, PV.X, T2.Z, ; 801FC081 61628802 < MUL_IEEE T13.W, KC0[4].Y, T2.W, ; 01804484 61A00210 < MUL_IEEE * T14.W, KC0[0].Y, T2.X, ; 80004480 61C00210 < DOT4 T1.X, KC0[10].X, T1.X, ; 0000208A 00205010 < DOT4 T1.Y (MASKED), KC0[10].Y, T1.Y, ; 0080248A 20205000 < DOT4 T1.Z (MASKED), KC0[10].Z, T1.Z, ; 0100288A 40205000 < DOT4 * T1.W (MASKED), KC0[10].W, T1.W, ; 81802C8A 60205000 < MULADD_IEEE T1.Z, KC0[1].Y, T2.Y, T14.W, ; 00804481 40228C0E < MULADD_IEEE * T1.W, KC0[5].Y, T6.W, T13.W, BS:VEC_201 ; 8180C485 60328C0D < MULADD_IEEE * T11.W, KC0[2].X, T1.X, T11.W, ; 80002082 61628C0B < ADD T11.X, PV.W, KC0[3].X, ; 00106CFE 01600010 < MULADD_IEEE * T12.Y, KC0[6].Y, T7.W, T1.W, ; 8180E486 21828C01 < MULADD_IEEE * T1.W, KC0[2].Y, T1.X, T1.Z, ; 80002482 60228801 < ADD T11.Y, PV.W, KC0[3].Y, ; 00906CFE 21600010 < MUL_IEEE T1.W, KC0[4].X, T3.Y, ; 00806084 60200210 < MUL_IEEE * T13.W, KC0[4].Y, T8.W, ; 81810484 61A00210 < MUL_IEEE T1.Z, KC0[4].Y, T3.Y, ; 00806484 40200210 < MULADD_IEEE T13.W, KC0[5].Y, T10.W, PS, ; 01814485 61A280FF < MULADD_IEEE * T1.W, KC0[5].X, T3.W, PV.W, BS:VEC_021/SCL_122 ; 81806085 60268CFE < MULADD_IEEE T14.X, KC0[6].X, T5.W, PS, ; 0180A086 01C280FF < MULADD_IEEE T9.Y, KC0[6].Y, T4.W, PV.W, BS:VEC_021/SCL_122 ; 01808486 21268CFE < MULADD_IEEE * T1.W, KC0[5].Y, T3.W, PV.Z, BS:VEC_102/SCL_221 ; 81806485 602E88FE < MULADD_IEEE T14.Y, KC0[6].Y, T5.W, PV.W, ; 0180A486 21C28CFE < MUL_IEEE * T1.W, KC0[4].Z, T3.Y, ; 80806884 60200210 < MUL_IEEE * T13.W, KC0[0].Z, T2.X, ; 80004880 61A00210 < MUL_IEEE T1.Y, KC0[4].Z, T2.W, ; 01804884 20200210 < MULADD_IEEE T1.Z, KC0[1].Z, T2.Y, PV.W, ; 00804881 40228CFE < MUL_IEEE * T2.W, KC0[4].Z, T8.W, BS:VEC_021/SCL_122 ; 81810884 60440210 < MULADD_IEEE * T1.W, KC0[5].Z, T3.W, T1.W, ; 81806885 60228C01 < MULADD_IEEE T14.Z, KC0[6].Z, T5.W, PV.W, ; 0180A886 41C28CFE < MUL_IEEE * T1.W, KC0[0].W, T2.X, ; 80004C80 60200210 < MULADD_IEEE * T2.W, KC0[5].Z, T10.W, T2.W, ; 81814885 60428C02 < MULADD_IEEE T9.Z, KC0[6].Z, T4.W, PV.W, ; 01808886 41228CFE < MULADD_IEEE * T1.W, KC0[1].W, T2.Y, T1.W, ; 80804C81 60228C01 < MULADD_IEEE * T2.W, KC0[2].Z, T1.X, T1.Z, ; 80002882 60428801 < ADD T11.Z, PV.W, KC0[3].Z, ; 01106CFE 41600010 < MULADD_IEEE * T1.W, KC0[2].W, T1.X, T1.W, ; 80002C82 60228C01 < MULADD_IEEE * T2.W, KC0[5].Z, T6.W, T1.Y, ; 8180C885 60428401 < MULADD_IEEE T12.Z, KC0[6].Z, T7.W, PV.W, ; 0180E886 41828CFE < ADD * T11.W, T1.W, KC0[3].W, ; 81906C01 61600010 < <