>
													     >
===== SHADER #200 ========================================== PS/RS880/R600 =====				===== SHADER #200 ========================================== PS/RS880/R600 =====
===== 128 dw ===== 7 gprs ===== 0 stack ========================================			     |	===== 128 dw ===== 8 gprs ===== 0 stack ========================================
0000  0000000e a0080000 ALU 3 @28										0000  0000000e a0080000 ALU 3 @28
 0028  80000c00 60006610     1      t: RECIP_IEEE         R0.w,  R0.w					     |	 0028  80000c00 00806610     1      t: RECIP_IEEE         R4.x,  R0.w
 0030  001fe000 00800210     2      x: MUL_IEEE           R4.x,  R0.x, PS				     |	 0030  001fe000 00a00210     2      x: MUL_IEEE           R5.x,  R0.x, PS
 0032  801fe400 20800210            y: MUL_IEEE           R4.y,  R0.y, PS				     |	 0032  801fe400 20a00210            y: MUL_IEEE           R5.y,  R0.y, PS
0002  00000008 80800000 TEX 1 @16										0002  00000008 80800000 TEX 1 @16
 0016  00041110 f00d1005 fc808000 SAMPLE              R5.xyzw, R4.xy__,   RID:17, SID:1 CT:NNNN		     |	 0016  00051110 f00d1004 fc808000 SAMPLE              R4.xyzw, R5.xy__,   RID:17, SID:1 CT:NNNN
0004  00000011 a0800000 ALU 33 @34										0004  00000011 a0800000 ALU 33 @34
 0034  0080a405 60000010     3      w: ADD                R0.w,  R5.y, R5.y				     |	 0034  00808404 60c00010     3      w: ADD                R6.w,  R4.y, R4.y
 0036  8000a005 60200010            t: ADD                R1.w,  R5.x, R5.x				     |	 0036  80008004 60800010            t: ADD                R4.w,  R4.x, R4.x
 0038  001fa0ff 00000010     4      x: ADD                R0.x,  PS, [0xbf800000 -1].x			     |	 0038  001fa0ff 00800010     4      x: ADD                R4.x,  PS, [0xbf800000 -1].x
 0040  801facfe 20000010            y: ADD                R0.y,  PV.w, [0xbf800000 -1].x		     |	 0040  801facfe 20800010            y: ADD                R4.y,  PV.w, [0xbf800000 -1].x
 0042  bf800000 												 0042  bf800000 
 0044  00000000 00005000     5      x: DOT4               __.x,  R0.x, R0.x				     |	 0044  00008004 00805000     5      x: DOT4               __.x,  R4.x, R4.x
 0046  00800400 20005000            y: DOT4               __.y,  R0.y, R0.y				     |	 0046  00808404 20805000            y: DOT4               __.y,  R4.y, R4.y
 0048  001f00f8 40005000            z: DOT4               __.z,  0, 0					     |	 0048  001f00f8 40805010            z: DOT4               R4.z,  0, 0
 0050  801f00f8 60005010            w: DOT4               R0.w,  0, 0					     |	 0050  801f00f8 60805000            w: DOT4               __.w,  0, 0
 0052  801f30fe e0000010     6      w: ADD_sat            R0.w,  -PV.x, 1.0				     |	 0052  801f30fe e0800010     6      w: ADD_sat            R4.w,  -PV.x, 1.0
 0054  80000cfe 60206710     7      t: RECIPSQRT_CLAMPED  R1.w,  PV.w					     |	 0054  80000cfe 40806710     7      t: RECIPSQRT_CLAMPED  R4.z,  PV.w
 0056  00000001 60400210     8      w: MUL_IEEE           R2.w,  R1.x, R0.x				     |	 0056  00008001 60c00210     8      w: MUL_IEEE           R6.w,  R1.x, R4.x
 0058  818000ff 60200210            t: MUL_IEEE           R1.w,  PS, R0.w				     |	 0058  818080ff 60e00210            t: MUL_IEEE           R7.w,  PS, R4.w
 0060  00000401 40a00210     9      z: MUL_IEEE           R5.z,  R1.y, R0.x				     |	 0060  00008401 40800210     9      z: MUL_IEEE           R4.z,  R1.y, R4.x
 0062  001f1c00 600340ff            w: CNDGE              R0.w,  -R0.w, 0, PS				     |	 0062  001f1c04 608340ff            w: CNDGE              R4.w,  -R4.w, 0, PS
 0064  80800002 60228cfe            t: MULADD_IEEE        R1.w,  R2.x, R0.y, PV.w			     |	 0064  80808002 60c28cfe            t: MULADD_IEEE        R6.w,  R2.x, R4.y, PV.w
 0066  019fc003 00c280ff    10      x: MULADD_IEEE        R6.x,  R3.x, PV.w, PS					 0066  019fc003 00c280ff    10      x: MULADD_IEEE        R6.x,  R3.x, PV.w, PS
 0068  00000801 60200210            w: MUL_IEEE           R1.w,  R1.z, R0.x				     |	 0068  00008801 60200210            w: MUL_IEEE           R1.w,  R1.z, R4.x
 0070  80800402 604288fe            t: MULADD_IEEE        R2.w,  R2.y, R0.y, PV.z			     |	 0070  80808402 60e288fe            t: MULADD_IEEE        R7.w,  R2.y, R4.y, PV.z
 0072  01800403 20c280ff    11      y: MULADD_IEEE        R6.y,  R3.y, R0.w, PS				     |	 0072  01808403 20c280ff    11      y: MULADD_IEEE        R6.y,  R3.y, R4.w, PS
 0074  80800802 60228cfe            w: MULADD_IEEE        R1.w,  R2.z, R0.y, PV.w			     |	 0074  80808802 60228cfe            w: MULADD_IEEE        R1.w,  R2.z, R4.y, PV.w
 0076  01800803 40c28cfe    12      z: MULADD_IEEE        R6.z,  R3.z, R0.w, PV.w			     |	 0076  01808803 40c28cfe    12      z: MULADD_IEEE        R6.z,  R3.z, R4.w, PV.w
 0078  800000f8 60c01910            w: MOV                R6.w,  0						 0078  800000f8 60c01910            w: MOV                R6.w,  0
 0080  0080c806 00205210    13      x: CUBE               R1.x,  R6.z, R6.y					 0080  0080c806 00205210    13      x: CUBE               R1.x,  R6.z, R6.y
 0082  0000c806 20205210            y: CUBE               R1.y,  R6.z, R6.x					 0082  0000c806 20205210            y: CUBE               R1.y,  R6.z, R6.x
 0084  0100c006 40205210            z: CUBE               R1.z,  R6.x, R6.z					 0084  0100c006 40205210            z: CUBE               R1.z,  R6.x, R6.z
 0086  8100c406 60205210            w: CUBE               R1.w,  R6.y, R6.z					 0086  8100c406 60205210            w: CUBE               R1.w,  R6.y, R6.z
 0088  800008fe 00006611    14      t: RECIP_IEEE         R0.x,  |PV.z|					     |	 0088  800008fe 00406611    14      t: RECIP_IEEE         R2.x,  |PV.z|
 0090  801fe001 204280fd    15      y: MULADD_IEEE        R2.y,  R1.x, PS, [0x3fc00000 1.5].x		     |	 0090  801fe001 206280fd    15      y: MULADD_IEEE        R3.y,  R1.x, PS, [0x3fc00000 1.5].x
 0092  3fc00000 												 0092  3fc00000 
 0094  00000401 004280fd    16      x: MULADD_IEEE        R2.x,  R1.y, R0.x, [0x3fc00000 1.5].x		     |	 0094  00004401 006280fd    16      x: MULADD_IEEE        R3.x,  R1.y, R2.x, [0x3fc00000 1.5].x
 0096  80000c01 60401910            w: MOV                R2.w,  R1.w					     |	 0096  80000c01 60601910            w: MOV                R3.w,  R1.w
 0098  3fc00000 												 0098  3fc00000 
0006  0000000a 80800400 TEX 2 @20										0006  0000000a 80800400 TEX 2 @20
 0020  00021210 f00d1001 0c810000 SAMPLE              R1.xyzw, R2.xywx,   RID:18, SID:2 CT:NNNN		     |	 0020  00031210 f00d1001 0c810000 SAMPLE              R1.xyzw, R3.xywx,   RID:18, SID:2 CT:NNNN
 0024  00041010 f00d1002 fc800000 SAMPLE              R2.xyzw, R4.xy__,   RID:16, SID:0 CT:NNNN		     |	 0024  00051010 f00d1002 fc800000 SAMPLE              R2.xyzw, R5.xy__,   RID:16, SID:0 CT:NNNN
0008  80000032 a0340000 ALU 14 @100 KC0[CB0:0-31]								0008  80000032 a0340000 ALU 14 @100 KC0[CB0:0-31]
 0100  00002002 60000210    17      w: MUL_IEEE           R0.w,  R2.x, R1.x				     |	 0100  00002002 60600210    17      w: MUL_IEEE           R3.w,  R2.x, R1.x
 0102  80100002 60600210            t: MUL_IEEE           R3.w,  R2.x, KC0[0].x				     |	 0102  80100002 60800210            t: MUL_IEEE           R4.w,  R2.x, KC0[0].x
 0104  00802402 40600210    18      z: MUL_IEEE           R3.z,  R2.y, R1.y					 0104  00802402 40600210    18      z: MUL_IEEE           R3.z,  R2.y, R1.y
 0106  00900402 60800210            w: MUL_IEEE           R4.w,  R2.y, KC0[0].y				     |	 0106  00900402 60a00210            w: MUL_IEEE           R5.w,  R2.y, KC0[0].y
 0108  80108cfe 600680ff            t: MULADD_IEEE        R0.w,  PV.w, KC0[4].x, PS     SCL_122		     |	 0108  80108cfe 606680ff            t: MULADD_IEEE        R3.w,  PV.w, KC0[4].x, PS     SCL_122
 0110  0010a0ff 00a00210    19      x: MUL_IEEE           R5.x,  PS, KC0[5].x				     |	 0110  0010a0ff 00800210    19      x: MUL_IEEE           R4.x,  PS, KC0[5].x
 0112  01002802 40200210            z: MUL_IEEE           R1.z,  R2.z, R1.z					 0112  01002802 40200210            z: MUL_IEEE           R1.z,  R2.z, R1.z
 0114  81100802 60000210            w: MUL_IEEE           R0.w,  R2.z, KC0[0].z				     |	 0114  81100802 60200210            w: MUL_IEEE           R1.w,  R2.z, KC0[0].z
 0116  80108803 60228c04    20      w: MULADD_IEEE        R1.w,  R3.z, KC0[4].x, R4.w			     |	 0116  80108803 60628c05    20      w: MULADD_IEEE        R3.w,  R3.z, KC0[4].x, R5.w
 0118  0090acfe 20a00210    21      y: MUL_IEEE           R5.y,  PV.w, KC0[5].y				     |	 0118  0090acfe 20800210    21      y: MUL_IEEE           R4.y,  PV.w, KC0[5].y
 0120  80108801 60028c00            w: MULADD_IEEE        R0.w,  R1.z, KC0[4].x, R0.w			     |	 0120  80108801 60228c01            w: MULADD_IEEE        R1.w,  R1.z, KC0[4].x, R1.w
 0122  0110acfe 40a00210    22      z: MUL_IEEE           R5.z,  PV.w, KC0[5].z				     |	 0122  0110acfe 40800210    22      z: MUL_IEEE           R4.z,  PV.w, KC0[5].z
 0124  81000c02 60000210            w: MUL_IEEE           R0.w,  R2.w, R0.z					 0124  81000c02 60000210            w: MUL_IEEE           R0.w,  R2.w, R0.z
 0126  8190acfe 60a00210    23      w: MUL_IEEE           R5.w,  PV.w, KC0[5].w				     |	 0126  8190acfe 60800210    23      w: MUL_IEEE           R4.w,  PV.w, KC0[5].w
0010  c0028000 94200688 EXPORT_DONE        PIXEL 0     R5.xyzw  EOP					     |	0010  c0020000 94200688 EXPORT_DONE        PIXEL 0     R4.xyzw  EOP
===== SHADER_END ===============================================================				===== SHADER_END ===============================================================


===== SHADER #200 OPT ====================================== PS/RS880/R600 =====				===== SHADER #200 OPT ====================================== PS/RS880/R600 =====
===== 120 dw ===== 5 gprs ===== 0 stack ========================================				===== 120 dw ===== 5 gprs ===== 0 stack ========================================
0000  00000006 a0080000 ALU 3 @12										0000  00000006 a0080000 ALU 3 @12
 0012  80000c00 6f806610     1      t: RECIP_IEEE         T0.w,  R0.w						 0012  80000c00 6f806610     1      t: RECIP_IEEE         T0.w,  R0.w
 0014  018f8000 00000210     2      x: MUL_IEEE           R0.x,  R0.x, T0.w					 0014  018f8000 00000210     2      x: MUL_IEEE           R0.x,  R0.x, T0.w
 0016  818f8400 20000210            y: MUL_IEEE           R0.y,  R0.y, T0.w					 0016  818f8400 20000210            y: MUL_IEEE           R0.y,  R0.y, T0.w
0002  0000000a 80800000 TEX 1 @20										0002  0000000a 80800000 TEX 1 @20
 0020  00001110 f01f9004 fc808000 SAMPLE              R4.xy__, R0.xy__,   RID:17, SID:1 CT:NNNN			 0020  00001110 f01f9004 fc808000 SAMPLE              R4.xy__, R0.xy__,   RID:17, SID:1 CT:NNNN
0004  0000000c a0700000 ALU 29 @24										0004  0000000c a0700000 ALU 29 @24
 0024  801fa404 600204fd     3      w: MULADD             R0.w,  R4.y, [0x40000000 2].x, [0xbf800000 -1].y	 0024  801fa404 600204fd     3      w: MULADD             R0.w,  R4.y, [0x40000000 2].x, [0xbf800000 -1].y
 0026  40000000 												 0026  40000000 
 0027  bf800000 												 0027  bf800000 
 0028  801fa004 6fe204fd     4      w: MULADD             T3.w,  R4.x, [0x40000000 2].x, [0xbf800000 -1].y	 0028  801fa004 6fe204fd     4      w: MULADD             T3.w,  R4.x, [0x40000000 2].x, [0xbf800000 -1].y
 0030  40000000 												 0030  40000000 
 0031  bf800000 												 0031  bf800000 
 0032  018fec7f 00005000     5      x: DOT4               __.x,  T3.w, T3.w					 0032  018fec7f 00005000     5      x: DOT4               __.x,  T3.w, T3.w
 0034  01800c00 20085000            y: DOT4               __.y,  R0.w, R0.w             VEC_120			 0034  01800c00 20085000            y: DOT4               __.y,  R0.w, R0.w             VEC_120
 0036  001f00f8 40005000            z: DOT4               __.z,  0, 0						 0036  001f00f8 40005000            z: DOT4               __.z,  0, 0
 0038  801f00f8 6f805010            w: DOT4               T0.w,  0, 0						 0038  801f00f8 6f805010            w: DOT4               T0.w,  0, 0
 0040  801f3c7c efc00010     6      w: ADD_sat            T2.w,  -T0.w, 1.0					 0040  801f3c7c efc00010     6      w: ADD_sat            T2.w,  -T0.w, 1.0
 0042  80000c7e 6f806710     7      t: RECIPSQRT_CLAMPED  T0.w,  T2.w						 0042  80000c7e 6f806710     7      t: RECIPSQRT_CLAMPED  T0.w,  T2.w
 0044  018fe001 0f840210     8      x: MUL_IEEE           T0.x,  R1.x, T3.w             VEC_021			 0044  018fe001 0f840210     8      x: MUL_IEEE           T0.x,  R1.x, T3.w             VEC_021
 0046  018fcc7c 6fa00210            w: MUL_IEEE           T1.w,  T0.w, T2.w					 0046  018fcc7c 6fa00210            w: MUL_IEEE           T1.w,  T0.w, T2.w
 0048  818fe801 6f840210            t: MUL_IEEE           T0.w,  R1.z, T3.w             SCL_122			 0048  818fe801 6f840210            t: MUL_IEEE           T0.w,  R1.z, T3.w             SCL_122
 0050  01800002 0fa2807c     9      x: MULADD_IEEE        T1.x,  R2.x, R0.w, T0.x				 0050  01800002 0fa2807c     9      x: MULADD_IEEE        T1.x,  R2.x, R0.w, T0.x
 0052  018fe401 2f8c0210            y: MUL_IEEE           T0.y,  R1.y, T3.w             VEC_102			 0052  018fe401 2f8c0210            y: MUL_IEEE           T0.y,  R1.y, T3.w             VEC_102
 0054  81800802 6f828c7c            w: MULADD_IEEE        T0.w,  R2.z, R0.w, T0.w				 0054  81800802 6f828c7c            w: MULADD_IEEE        T0.w,  R2.z, R0.w, T0.w
 0056  01800402 0f82847c    10      x: MULADD_IEEE        T0.x,  R2.y, R0.w, T0.y				 0056  01800402 0f82847c    10      x: MULADD_IEEE        T0.x,  R2.y, R0.w, T0.y
 0058  801f1c7e 4f834c7d            z: CNDGE              T0.z,  -T2.w, 0, T1.w					 0058  801f1c7e 4f834c7d            z: CNDGE              T0.z,  -T2.w, 0, T1.w
 0060  010f8403 0f86807c    11      x: MULADD_IEEE        T0.x,  R3.y, T0.z, T0.x       VEC_021			 0060  010f8403 0f86807c    11      x: MULADD_IEEE        T0.x,  R3.y, T0.z, T0.x       VEC_021
 0062  010f8003 2f82807d            y: MULADD_IEEE        T0.y,  R3.x, T0.z, T1.x				 0062  010f8003 2f82807d            y: MULADD_IEEE        T0.y,  R3.x, T0.z, T1.x
 0064  810f8803 6f828c7c            w: MULADD_IEEE        T0.w,  R3.z, T0.z, T0.w				 0064  810f8803 6f828c7c            w: MULADD_IEEE        T0.w,  R3.z, T0.z, T0.w
 0066  000f8c7c 0f805210    12      x: CUBE               T0.x,  T0.w, T0.x					 0066  000f8c7c 0f805210    12      x: CUBE               T0.x,  T0.w, T0.x
 0068  008f8c7c 2f805210            y: CUBE               T0.y,  T0.w, T0.y					 0068  008f8c7c 2f805210            y: CUBE               T0.y,  T0.w, T0.y
 0070  018f847c 4f805210            z: CUBE               T0.z,  T0.y, T0.w					 0070  018f847c 4f805210            z: CUBE               T0.z,  T0.y, T0.w
 0072  818f807c 60205210            w: CUBE               R1.w,  T0.x, T0.w					 0072  818f807c 60205210            w: CUBE               R1.w,  T0.x, T0.w
 0074  8000087c 6f806611    13      t: RECIP_IEEE         T0.w,  |T0.z|						 0074  8000087c 6f806611    13      t: RECIP_IEEE         T0.w,  |T0.z|
 0076  018f847c 002280fd    14      x: MULADD_IEEE        R1.x,  T0.y, T0.w, [0x3fc00000 1.5].x			 0076  018f847c 002280fd    14      x: MULADD_IEEE        R1.x,  T0.y, T0.w, [0x3fc00000 1.5].x
 0078  818f807c 202280fd            y: MULADD_IEEE        R1.y,  T0.x, T0.w, [0x3fc00000 1.5].x			 0078  818f807c 202280fd            y: MULADD_IEEE        R1.y,  T0.x, T0.w, [0x3fc00000 1.5].x
 0080  3fc00000 												 0080  3fc00000 
0006  0000002a 80800400 TEX 2 @84										0006  0000002a 80800400 TEX 2 @84
 0084  00011210 f01d1002 0c810000 SAMPLE              R2.xyz_, R1.xywx,   RID:18, SID:2 CT:NNNN			 0084  00011210 f01d1002 0c810000 SAMPLE              R2.xyz_, R1.xywx,   RID:18, SID:2 CT:NNNN
 0088  00001010 f00d1001 fc800000 SAMPLE              R1.xyzw, R0.xy__,   RID:16, SID:0 CT:NNNN			 0088  00001010 f00d1001 fc800000 SAMPLE              R1.xyzw, R0.xy__,   RID:16, SID:0 CT:NNNN
0008  4000002e a0340000 ALU 14 @92 KC0[CB0:0-15]								0008  4000002e a0340000 ALU 14 @92 KC0[CB0:0-15]
 0092  01004801 0f800210    15      x: MUL_IEEE           T0.x,  R1.z, R2.z					 0092  01004801 0f800210    15      x: MUL_IEEE           T0.x,  R1.z, R2.z
 0094  81100801 2f800210            y: MUL_IEEE           T0.y,  R1.z, KC0[0].z					 0094  81100801 2f800210            y: MUL_IEEE           T0.y,  R1.z, KC0[0].z
 0096  0010807c 0fa2847c    16      x: MULADD_IEEE        T1.x,  T0.x, KC0[4].x, T0.y				 0096  0010807c 0fa2847c    16      x: MULADD_IEEE        T1.x,  T0.x, KC0[4].x, T0.y
 0098  00900401 2f800210            y: MUL_IEEE           T0.y,  R1.y, KC0[0].y					 0098  00900401 2f800210            y: MUL_IEEE           T0.y,  R1.y, KC0[0].y
 0100  00004001 4f880210            z: MUL_IEEE           T0.z,  R1.x, R2.x             VEC_120			 0100  00004001 4f880210            z: MUL_IEEE           T0.z,  R1.x, R2.x             VEC_120
 0102  00804401 6f800210            w: MUL_IEEE           T0.w,  R1.y, R2.y					 0102  00804401 6f800210            w: MUL_IEEE           T0.w,  R1.y, R2.y
 0104  80100001 0f840210            t: MUL_IEEE           T0.x,  R1.x, KC0[0].x         SCL_122			 0104  80100001 0f840210            t: MUL_IEEE           T0.x,  R1.x, KC0[0].x         SCL_122
 0106  0010887c 0f82807c    17      x: MULADD_IEEE        T0.x,  T0.z, KC0[4].x, T0.x				 0106  0010887c 0f82807c    17      x: MULADD_IEEE        T0.x,  T0.z, KC0[4].x, T0.x
 0108  00108c7c 2f82847c            y: MULADD_IEEE        T0.y,  T0.w, KC0[4].x, T0.y				 0108  00108c7c 2f82847c            y: MULADD_IEEE        T0.y,  T0.w, KC0[4].x, T0.y
 0110  81000c01 4f880210            z: MUL_IEEE           T0.z,  R1.w, R0.z             VEC_120			 0110  81000c01 4f880210            z: MUL_IEEE           T0.z,  R1.w, R0.z             VEC_120
 0112  0010a07c 00000210    18      x: MUL_IEEE           R0.x,  T0.x, KC0[5].x					 0112  0010a07c 00000210    18      x: MUL_IEEE           R0.x,  T0.x, KC0[5].x
 0114  0090a47c 20000210            y: MUL_IEEE           R0.y,  T0.y, KC0[5].y					 0114  0090a47c 20000210            y: MUL_IEEE           R0.y,  T0.y, KC0[5].y
 0116  0110a07d 40080210            z: MUL_IEEE           R0.z,  T1.x, KC0[5].z         VEC_120			 0116  0110a07d 40080210            z: MUL_IEEE           R0.z,  T1.x, KC0[5].z         VEC_120
 0118  8190a87c 60000210            w: MUL_IEEE           R0.w,  T0.z, KC0[5].w					 0118  8190a87c 60000210            w: MUL_IEEE           R0.w,  T0.z, KC0[5].w
0010  c0000000 94200688 EXPORT_DONE        PIXEL 0     R0.xyzw  EOP						0010  c0000000 94200688 EXPORT_DONE        PIXEL 0     R0.xyzw  EOP
===== SHADER_END ===============================================================				===== SHADER_END ===============================================================

--------------------------------------------------------------							--------------------------------------------------------------
VERT														VERT
DCL IN[0]													DCL IN[0]
DCL IN[1]													DCL IN[1]
DCL IN[2]													DCL IN[2]
DCL IN[3]													DCL IN[3]
DCL OUT[0], POSITION												DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]												DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]												DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]												DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]												DCL OUT[4], GENERIC[3]
DCL CONST[0..11]												DCL CONST[0..11]
DCL TEMP[0..6], LOCAL												DCL TEMP[0..6], LOCAL
IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}							IMM[0] FLT32 {    1.0000,     0.0000,     0.0000,     0.0000}
  0: DP4 TEMP[0].x, CONST[8], IN[0]										  0: DP4 TEMP[0].x, CONST[8], IN[0]
  1: DP4 TEMP[1].x, CONST[9], IN[0]										  1: DP4 TEMP[1].x, CONST[9], IN[0]
  2: DP4 TEMP[2].x, CONST[10], IN[0]										  2: DP4 TEMP[2].x, CONST[10], IN[0]
  3: DP3 TEMP[3].x, CONST[8].xyzz, IN[2].xyzz									  3: DP3 TEMP[3].x, CONST[8].xyzz, IN[2].xyzz
  4: DP3 TEMP[4].x, CONST[9].xyzz, IN[2].xyzz									  4: DP3 TEMP[4].x, CONST[9].xyzz, IN[2].xyzz
  5: MOV TEMP[3].y, TEMP[4].xxxx										  5: MOV TEMP[3].y, TEMP[4].xxxx
  6: DP3 TEMP[4].x, CONST[10].xyzz, IN[2].xyzz									  6: DP3 TEMP[4].x, CONST[10].xyzz, IN[2].xyzz
  7: MOV TEMP[3].z, TEMP[4].xxxx										  7: MOV TEMP[3].z, TEMP[4].xxxx
  8: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz									  8: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
  9: RSQ TEMP[4].x, TEMP[4].xxxx										  9: RSQ TEMP[4].x, TEMP[4].xxxx
 10: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx								 10: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
 11: DP3 TEMP[4].x, CONST[8].xyzz, IN[3].xyzz									 11: DP3 TEMP[4].x, CONST[8].xyzz, IN[3].xyzz
 12: DP3 TEMP[5].x, CONST[9].xyzz, IN[3].xyzz									 12: DP3 TEMP[5].x, CONST[9].xyzz, IN[3].xyzz
 13: MOV TEMP[4].y, TEMP[5].xxxx										 13: MOV TEMP[4].y, TEMP[5].xxxx
 14: DP3 TEMP[5].x, CONST[10].xyzz, IN[3].xyzz									 14: DP3 TEMP[5].x, CONST[10].xyzz, IN[3].xyzz
 15: MOV TEMP[4].z, TEMP[5].xxxx										 15: MOV TEMP[4].z, TEMP[5].xxxx
 16: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz									 16: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
 17: RSQ TEMP[5].x, TEMP[5].xxxx										 17: RSQ TEMP[5].x, TEMP[5].xxxx
 18: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx								 18: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
 19: MOV TEMP[5].w, IN[1].wwww											 19: MOV TEMP[5].w, IN[1].wwww
 20: MUL TEMP[6].xy, IN[1].xyyy, CONST[11].xyyy									 20: MUL TEMP[6].xy, IN[1].xyyy, CONST[11].xyyy
 21: MAD TEMP[5].xy, CONST[11].zwww, IN[1].wwww, TEMP[6].xyyy							 21: MAD TEMP[5].xy, CONST[11].zwww, IN[1].wwww, TEMP[6].xyyy
 22: MOV TEMP[5].z, IN[2].wwww											 22: MOV TEMP[5].z, IN[2].wwww
 23: MUL TEMP[0], CONST[0], TEMP[0].xxxx									 23: MUL TEMP[0], CONST[0], TEMP[0].xxxx
 24: MAD TEMP[0], CONST[1], TEMP[1].xxxx, TEMP[0]								 24: MAD TEMP[0], CONST[1], TEMP[1].xxxx, TEMP[0]
 25: MAD TEMP[0], CONST[2], TEMP[2].xxxx, TEMP[0]								 25: MAD TEMP[0], CONST[2], TEMP[2].xxxx, TEMP[0]
 26: ADD TEMP[0], TEMP[0], CONST[3]										 26: ADD TEMP[0], TEMP[0], CONST[3]
 27: MUL TEMP[1], CONST[4], TEMP[4].xxxx									 27: MUL TEMP[1], CONST[4], TEMP[4].xxxx
 28: MAD TEMP[1], CONST[5], TEMP[4].yyyy, TEMP[1]								 28: MAD TEMP[1], CONST[5], TEMP[4].yyyy, TEMP[1]
 29: MAD TEMP[1].xyz, CONST[6], TEMP[4].zzzz, TEMP[1]								 29: MAD TEMP[1].xyz, CONST[6], TEMP[4].zzzz, TEMP[1]
 30: MOV TEMP[1].xyz, TEMP[1].xyzx										 30: MOV TEMP[1].xyz, TEMP[1].xyzx
 31: MUL TEMP[6].xyz, TEMP[3].zxyy, TEMP[4].yzxx								 31: MUL TEMP[6].xyz, TEMP[3].zxyy, TEMP[4].yzxx
 32: MAD TEMP[4].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[6].xyzz							 32: MAD TEMP[4].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[6].xyzz
 33: MUL TEMP[2].xyz, TEMP[4].xyzz, IN[3].wwww									 33: MUL TEMP[2].xyz, TEMP[4].xyzz, IN[3].wwww
 34: MUL TEMP[4], CONST[4], TEMP[2].xxxx									 34: MUL TEMP[4], CONST[4], TEMP[2].xxxx
 35: MAD TEMP[4], CONST[5], TEMP[2].yyyy, TEMP[4]								 35: MAD TEMP[4], CONST[5], TEMP[2].yyyy, TEMP[4]
 36: MAD TEMP[2].xyz, CONST[6], TEMP[2].zzzz, TEMP[4]								 36: MAD TEMP[2].xyz, CONST[6], TEMP[2].zzzz, TEMP[4]
 37: MOV TEMP[2].xyz, TEMP[2].xyzx										 37: MOV TEMP[2].xyz, TEMP[2].xyzx
 38: MUL TEMP[4], CONST[4], TEMP[3].xxxx									 38: MUL TEMP[4], CONST[4], TEMP[3].xxxx
 39: MAD TEMP[4], CONST[5], TEMP[3].yyyy, TEMP[4]								 39: MAD TEMP[4], CONST[5], TEMP[3].yyyy, TEMP[4]
 40: MAD TEMP[3].xyz, CONST[6], TEMP[3].zzzz, TEMP[4]								 40: MAD TEMP[3].xyz, CONST[6], TEMP[3].zzzz, TEMP[4]
 41: MOV TEMP[3].xyz, TEMP[3].xyzx										 41: MOV TEMP[3].xyz, TEMP[3].xyzx
 42: MOV OUT[1], TEMP[5]											 42: MOV OUT[1], TEMP[5]
 43: MOV OUT[2], TEMP[1]											 43: MOV OUT[2], TEMP[1]
 44: MOV OUT[3], TEMP[2]											 44: MOV OUT[3], TEMP[2]
 45: MOV OUT[4], TEMP[3]											 45: MOV OUT[4], TEMP[3]
 46: MOV OUT[0], TEMP[0]											 46: MOV OUT[0], TEMP[0]
 47: END													 47: END
; ModuleID = 'tgsi'												; ModuleID = 'tgsi'

define void @main() #0 {										     |	define void @main(<4 x float> inreg, <4 x float> inreg, <4 x float> inreg, <4 x float> inreg, <4 x float> in
main_body:													main_body:
  %0 = call float @llvm.R600.load.input(i32 4)								     |	  %5 = extractelement <4 x float> %1, i32 0
  %1 = call float @llvm.R600.load.input(i32 5)								     |	  %6 = extractelement <4 x float> %1, i32 1
  %2 = call float @llvm.R600.load.input(i32 6)								     |	  %7 = extractelement <4 x float> %1, i32 2
  %3 = call float @llvm.R600.load.input(i32 7)								     |	  %8 = extractelement <4 x float> %1, i32 3
  %4 = call float @llvm.R600.load.input(i32 8)								     |	  %9 = extractelement <4 x float> %2, i32 0
  %5 = call float @llvm.R600.load.input(i32 9)								     |	  %10 = extractelement <4 x float> %2, i32 1
  %6 = call float @llvm.R600.load.input(i32 10)								     |	  %11 = extractelement <4 x float> %2, i32 3
  %7 = call float @llvm.R600.load.input(i32 11)								     |	  %12 = extractelement <4 x float> %3, i32 0
  %8 = call float @llvm.R600.load.input(i32 12)								     |	  %13 = extractelement <4 x float> %3, i32 1
  %9 = call float @llvm.R600.load.input(i32 13)								     |	  %14 = extractelement <4 x float> %3, i32 2
  %10 = call float @llvm.R600.load.input(i32 14)							     |	  %15 = extractelement <4 x float> %3, i32 3
  %11 = call float @llvm.R600.load.input(i32 15)							     |	  %16 = extractelement <4 x float> %4, i32 0
  %12 = call float @llvm.R600.load.input(i32 16)							     |	  %17 = extractelement <4 x float> %4, i32 1
  %13 = call float @llvm.R600.load.input(i32 17)							     |	  %18 = extractelement <4 x float> %4, i32 2
  %14 = call float @llvm.R600.load.input(i32 18)							     |	  %19 = extractelement <4 x float> %4, i32 3
  %15 = call float @llvm.R600.load.input(i32 19)							     <
  %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) <
  %17 = extractelement <4 x float> %16, i32 0								     <
  %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) <
  %19 = extractelement <4 x float> %18, i32 1								     <
  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)	  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
  %21 = extractelement <4 x float> %20, i32 2								     |	  %21 = extractelement <4 x float> %20, i32 0
  %22 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)	  %22 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
  %23 = extractelement <4 x float> %22, i32 3								     |	  %23 = extractelement <4 x float> %22, i32 1
  %24 = insertelement <4 x float> undef, float %17, i32 0						     |	  %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
  %25 = insertelement <4 x float> %24, float %19, i32 1							     |	  %25 = extractelement <4 x float> %24, i32 2
  %26 = insertelement <4 x float> %25, float %21, i32 2							     |	  %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
  %27 = insertelement <4 x float> %26, float %23, i32 3							     |	  %27 = extractelement <4 x float> %26, i32 3
  %28 = insertelement <4 x float> undef, float %0, i32 0						     |	  %28 = insertelement <4 x float> undef, float %21, i32 0
  %29 = insertelement <4 x float> %28, float %1, i32 1							     |	  %29 = insertelement <4 x float> %28, float %23, i32 1
  %30 = insertelement <4 x float> %29, float %2, i32 2							     |	  %30 = insertelement <4 x float> %29, float %25, i32 2
  %31 = insertelement <4 x float> %30, float %3, i32 3							     |	  %31 = insertelement <4 x float> %30, float %27, i32 3
  %32 = call float @llvm.AMDGPU.dp4(<4 x float> %27, <4 x float> %31)					     |	  %32 = insertelement <4 x float> undef, float %5, i32 0
  %33 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) |	  %33 = insertelement <4 x float> %32, float %6, i32 1
  %34 = extractelement <4 x float> %33, i32 0								     |	  %34 = insertelement <4 x float> %33, float %7, i32 2
  %35 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) |	  %35 = insertelement <4 x float> %34, float %8, i32 3
  %36 = extractelement <4 x float> %35, i32 1								     |	  %36 = call float @llvm.AMDGPU.dp4(<4 x float> %31, <4 x float> %35)
  %37 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)	  %37 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %38 = extractelement <4 x float> %37, i32 2								     |	  %38 = extractelement <4 x float> %37, i32 0
  %39 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)	  %39 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %40 = extractelement <4 x float> %39, i32 3								     |	  %40 = extractelement <4 x float> %39, i32 1
  %41 = insertelement <4 x float> undef, float %34, i32 0						     |	  %41 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %42 = insertelement <4 x float> %41, float %36, i32 1							     |	  %42 = extractelement <4 x float> %41, i32 2
  %43 = insertelement <4 x float> %42, float %38, i32 2							     |	  %43 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %44 = insertelement <4 x float> %43, float %40, i32 3							     |	  %44 = extractelement <4 x float> %43, i32 3
  %45 = insertelement <4 x float> undef, float %0, i32 0						     |	  %45 = insertelement <4 x float> undef, float %38, i32 0
  %46 = insertelement <4 x float> %45, float %1, i32 1							     |	  %46 = insertelement <4 x float> %45, float %40, i32 1
  %47 = insertelement <4 x float> %46, float %2, i32 2							     |	  %47 = insertelement <4 x float> %46, float %42, i32 2
  %48 = insertelement <4 x float> %47, float %3, i32 3							     |	  %48 = insertelement <4 x float> %47, float %44, i32 3
  %49 = call float @llvm.AMDGPU.dp4(<4 x float> %44, <4 x float> %48)					     |	  %49 = insertelement <4 x float> undef, float %5, i32 0
  %50 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10 |	  %50 = insertelement <4 x float> %49, float %6, i32 1
  %51 = extractelement <4 x float> %50, i32 0								     |	  %51 = insertelement <4 x float> %50, float %7, i32 2
  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10 |	  %52 = insertelement <4 x float> %51, float %8, i32 3
  %53 = extractelement <4 x float> %52, i32 1								     |	  %53 = call float @llvm.AMDGPU.dp4(<4 x float> %48, <4 x float> %52)
  %54 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10	  %54 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10
  %55 = extractelement <4 x float> %54, i32 2								     |	  %55 = extractelement <4 x float> %54, i32 0
  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10	  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10
  %57 = extractelement <4 x float> %56, i32 3								     |	  %57 = extractelement <4 x float> %56, i32 1
  %58 = insertelement <4 x float> undef, float %51, i32 0						     |	  %58 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10
  %59 = insertelement <4 x float> %58, float %53, i32 1							     |	  %59 = extractelement <4 x float> %58, i32 2
  %60 = insertelement <4 x float> %59, float %55, i32 2							     |	  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10
  %61 = insertelement <4 x float> %60, float %57, i32 3							     |	  %61 = extractelement <4 x float> %60, i32 3
  %62 = insertelement <4 x float> undef, float %0, i32 0						     |	  %62 = insertelement <4 x float> undef, float %55, i32 0
  %63 = insertelement <4 x float> %62, float %1, i32 1							     |	  %63 = insertelement <4 x float> %62, float %57, i32 1
  %64 = insertelement <4 x float> %63, float %2, i32 2							     |	  %64 = insertelement <4 x float> %63, float %59, i32 2
  %65 = insertelement <4 x float> %64, float %3, i32 3							     |	  %65 = insertelement <4 x float> %64, float %61, i32 3
  %66 = call float @llvm.AMDGPU.dp4(<4 x float> %61, <4 x float> %65)					     |	  %66 = insertelement <4 x float> undef, float %5, i32 0
  %67 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) |	  %67 = insertelement <4 x float> %66, float %6, i32 1
  %68 = extractelement <4 x float> %67, i32 0								     |	  %68 = insertelement <4 x float> %67, float %7, i32 2
  %69 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) |	  %69 = insertelement <4 x float> %68, float %8, i32 3
  %70 = extractelement <4 x float> %69, i32 1								     |	  %70 = call float @llvm.AMDGPU.dp4(<4 x float> %65, <4 x float> %69)
  %71 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)	  %71 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
  %72 = extractelement <4 x float> %71, i32 2								     |	  %72 = extractelement <4 x float> %71, i32 0
  %73 = insertelement <4 x float> undef, float %68, i32 0						     |	  %73 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
  %74 = insertelement <4 x float> %73, float %70, i32 1							     |	  %74 = extractelement <4 x float> %73, i32 1
  %75 = insertelement <4 x float> %74, float %72, i32 2							     |	  %75 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
  %76 = insertelement <4 x float> %75, float 0.000000e+00, i32 3					     |	  %76 = extractelement <4 x float> %75, i32 2
  %77 = insertelement <4 x float> undef, float %8, i32 0						     |	  %77 = insertelement <4 x float> undef, float %72, i32 0
  %78 = insertelement <4 x float> %77, float %9, i32 1							     |	  %78 = insertelement <4 x float> %77, float %74, i32 1
  %79 = insertelement <4 x float> %78, float %10, i32 2							     |	  %79 = insertelement <4 x float> %78, float %76, i32 2
  %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 3						  %80 = insertelement <4 x float> %79, float 0.000000e+00, i32 3
  %81 = call float @llvm.AMDGPU.dp4(<4 x float> %76, <4 x float> %80)					     |	  %81 = insertelement <4 x float> undef, float %12, i32 0
  %82 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) |	  %82 = insertelement <4 x float> %81, float %13, i32 1
  %83 = extractelement <4 x float> %82, i32 0								     |	  %83 = insertelement <4 x float> %82, float %14, i32 2
  %84 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) |	  %84 = insertelement <4 x float> %83, float 0.000000e+00, i32 3
  %85 = extractelement <4 x float> %84, i32 1								     |	  %85 = call float @llvm.AMDGPU.dp4(<4 x float> %80, <4 x float> %84)
  %86 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)	  %86 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %87 = extractelement <4 x float> %86, i32 2								     |	  %87 = extractelement <4 x float> %86, i32 0
  %88 = insertelement <4 x float> undef, float %83, i32 0						     |	  %88 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %89 = insertelement <4 x float> %88, float %85, i32 1							     |	  %89 = extractelement <4 x float> %88, i32 1
  %90 = insertelement <4 x float> %89, float %87, i32 2							     |	  %90 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 3					     |	  %91 = extractelement <4 x float> %90, i32 2
  %92 = insertelement <4 x float> undef, float %8, i32 0						     |	  %92 = insertelement <4 x float> undef, float %87, i32 0
  %93 = insertelement <4 x float> %92, float %9, i32 1							     |	  %93 = insertelement <4 x float> %92, float %89, i32 1
  %94 = insertelement <4 x float> %93, float %10, i32 2							     |	  %94 = insertelement <4 x float> %93, float %91, i32 2
  %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3						  %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3
  %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95)					     |	  %96 = insertelement <4 x float> undef, float %12, i32 0
  %97 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10 |	  %97 = insertelement <4 x float> %96, float %13, i32 1
  %98 = extractelement <4 x float> %97, i32 0								     |	  %98 = insertelement <4 x float> %97, float %14, i32 2
  %99 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10 |	  %99 = insertelement <4 x float> %98, float 0.000000e+00, i32 3
  %100 = extractelement <4 x float> %99, i32 1								     |	  %100 = call float @llvm.AMDGPU.dp4(<4 x float> %95, <4 x float> %99)
  %101 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1	  %101 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1
  %102 = extractelement <4 x float> %101, i32 2								     |	  %102 = extractelement <4 x float> %101, i32 0
  %103 = insertelement <4 x float> undef, float %98, i32 0						     |	  %103 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1
  %104 = insertelement <4 x float> %103, float %100, i32 1						     |	  %104 = extractelement <4 x float> %103, i32 1
  %105 = insertelement <4 x float> %104, float %102, i32 2						     |	  %105 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1
  %106 = insertelement <4 x float> %105, float 0.000000e+00, i32 3					     |	  %106 = extractelement <4 x float> %105, i32 2
  %107 = insertelement <4 x float> undef, float %8, i32 0						     |	  %107 = insertelement <4 x float> undef, float %102, i32 0
  %108 = insertelement <4 x float> %107, float %9, i32 1						     |	  %108 = insertelement <4 x float> %107, float %104, i32 1
  %109 = insertelement <4 x float> %108, float %10, i32 2						     |	  %109 = insertelement <4 x float> %108, float %106, i32 2
  %110 = insertelement <4 x float> %109, float 0.000000e+00, i32 3						  %110 = insertelement <4 x float> %109, float 0.000000e+00, i32 3
  %111 = call float @llvm.AMDGPU.dp4(<4 x float> %106, <4 x float> %110)				     |	  %111 = insertelement <4 x float> undef, float %12, i32 0
  %112 = insertelement <4 x float> undef, float %81, i32 0						     |	  %112 = insertelement <4 x float> %111, float %13, i32 1
  %113 = insertelement <4 x float> %112, float %96, i32 1						     |	  %113 = insertelement <4 x float> %112, float %14, i32 2
  %114 = insertelement <4 x float> %113, float %111, i32 2						     |	  %114 = insertelement <4 x float> %113, float 0.000000e+00, i32 3
  %115 = insertelement <4 x float> %114, float 0.000000e+00, i32 3					     |	  %115 = call float @llvm.AMDGPU.dp4(<4 x float> %110, <4 x float> %114)
  %116 = insertelement <4 x float> undef, float %81, i32 0						     |	  %116 = insertelement <4 x float> undef, float %85, i32 0
  %117 = insertelement <4 x float> %116, float %96, i32 1						     |	  %117 = insertelement <4 x float> %116, float %100, i32 1
  %118 = insertelement <4 x float> %117, float %111, i32 2						     |	  %118 = insertelement <4 x float> %117, float %115, i32 2
  %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 3						  %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 3
  %120 = call float @llvm.AMDGPU.dp4(<4 x float> %115, <4 x float> %119)				     |	  %120 = insertelement <4 x float> undef, float %85, i32 0
  %121 = call float @llvm.AMDGPU.rsq(float %120)							     |	  %121 = insertelement <4 x float> %120, float %100, i32 1
  %122 = fmul float %81, %121										     |	  %122 = insertelement <4 x float> %121, float %115, i32 2
  %123 = fmul float %96, %121										     |	  %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3
  %124 = fmul float %111, %121										     |	  %124 = call float @llvm.AMDGPU.dp4(<4 x float> %119, <4 x float> %123)
  %125 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8 |	  %125 = call float @llvm.AMDGPU.rsq(float %124)
  %126 = extractelement <4 x float> %125, i32 0								     |	  %126 = fmul float %85, %125
  %127 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8 |	  %127 = fmul float %100, %125
  %128 = extractelement <4 x float> %127, i32 1								     |	  %128 = fmul float %115, %125
  %129 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8	  %129 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8
  %130 = extractelement <4 x float> %129, i32 2								     |	  %130 = extractelement <4 x float> %129, i32 0
  %131 = insertelement <4 x float> undef, float %126, i32 0						     |	  %131 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8
  %132 = insertelement <4 x float> %131, float %128, i32 1						     |	  %132 = extractelement <4 x float> %131, i32 1
  %133 = insertelement <4 x float> %132, float %130, i32 2						     |	  %133 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8
  %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 3					     |	  %134 = extractelement <4 x float> %133, i32 2
  %135 = insertelement <4 x float> undef, float %12, i32 0						     |	  %135 = insertelement <4 x float> undef, float %130, i32 0
  %136 = insertelement <4 x float> %135, float %13, i32 1						     |	  %136 = insertelement <4 x float> %135, float %132, i32 1
  %137 = insertelement <4 x float> %136, float %14, i32 2						     |	  %137 = insertelement <4 x float> %136, float %134, i32 2
  %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 3						  %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 3
  %139 = call float @llvm.AMDGPU.dp4(<4 x float> %134, <4 x float> %138)				     |	  %139 = insertelement <4 x float> undef, float %16, i32 0
  %140 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9 |	  %140 = insertelement <4 x float> %139, float %17, i32 1
  %141 = extractelement <4 x float> %140, i32 0								     |	  %141 = insertelement <4 x float> %140, float %18, i32 2
  %142 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9 |	  %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 3
  %143 = extractelement <4 x float> %142, i32 1								     |	  %143 = call float @llvm.AMDGPU.dp4(<4 x float> %138, <4 x float> %142)
  %144 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9	  %144 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9
  %145 = extractelement <4 x float> %144, i32 2								     |	  %145 = extractelement <4 x float> %144, i32 0
  %146 = insertelement <4 x float> undef, float %141, i32 0						     |	  %146 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9
  %147 = insertelement <4 x float> %146, float %143, i32 1						     |	  %147 = extractelement <4 x float> %146, i32 1
  %148 = insertelement <4 x float> %147, float %145, i32 2						     |	  %148 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9
  %149 = insertelement <4 x float> %148, float 0.000000e+00, i32 3					     |	  %149 = extractelement <4 x float> %148, i32 2
  %150 = insertelement <4 x float> undef, float %12, i32 0						     |	  %150 = insertelement <4 x float> undef, float %145, i32 0
  %151 = insertelement <4 x float> %150, float %13, i32 1						     |	  %151 = insertelement <4 x float> %150, float %147, i32 1
  %152 = insertelement <4 x float> %151, float %14, i32 2						     |	  %152 = insertelement <4 x float> %151, float %149, i32 2
  %153 = insertelement <4 x float> %152, float 0.000000e+00, i32 3						  %153 = insertelement <4 x float> %152, float 0.000000e+00, i32 3
  %154 = call float @llvm.AMDGPU.dp4(<4 x float> %149, <4 x float> %153)				     |	  %154 = insertelement <4 x float> undef, float %16, i32 0
  %155 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 |	  %155 = insertelement <4 x float> %154, float %17, i32 1
  %156 = extractelement <4 x float> %155, i32 0								     |	  %156 = insertelement <4 x float> %155, float %18, i32 2
  %157 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 |	  %157 = insertelement <4 x float> %156, float 0.000000e+00, i32 3
  %158 = extractelement <4 x float> %157, i32 1								     |	  %158 = call float @llvm.AMDGPU.dp4(<4 x float> %153, <4 x float> %157)
  %159 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1	  %159 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1
  %160 = extractelement <4 x float> %159, i32 2								     |	  %160 = extractelement <4 x float> %159, i32 0
  %161 = insertelement <4 x float> undef, float %156, i32 0						     |	  %161 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1
  %162 = insertelement <4 x float> %161, float %158, i32 1						     |	  %162 = extractelement <4 x float> %161, i32 1
  %163 = insertelement <4 x float> %162, float %160, i32 2						     |	  %163 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1
  %164 = insertelement <4 x float> %163, float 0.000000e+00, i32 3					     |	  %164 = extractelement <4 x float> %163, i32 2
  %165 = insertelement <4 x float> undef, float %12, i32 0						     |	  %165 = insertelement <4 x float> undef, float %160, i32 0
  %166 = insertelement <4 x float> %165, float %13, i32 1						     |	  %166 = insertelement <4 x float> %165, float %162, i32 1
  %167 = insertelement <4 x float> %166, float %14, i32 2						     |	  %167 = insertelement <4 x float> %166, float %164, i32 2
  %168 = insertelement <4 x float> %167, float 0.000000e+00, i32 3						  %168 = insertelement <4 x float> %167, float 0.000000e+00, i32 3
  %169 = call float @llvm.AMDGPU.dp4(<4 x float> %164, <4 x float> %168)				     |	  %169 = insertelement <4 x float> undef, float %16, i32 0
  %170 = insertelement <4 x float> undef, float %139, i32 0						     |	  %170 = insertelement <4 x float> %169, float %17, i32 1
  %171 = insertelement <4 x float> %170, float %154, i32 1						     |	  %171 = insertelement <4 x float> %170, float %18, i32 2
  %172 = insertelement <4 x float> %171, float %169, i32 2						     |	  %172 = insertelement <4 x float> %171, float 0.000000e+00, i32 3
  %173 = insertelement <4 x float> %172, float 0.000000e+00, i32 3					     |	  %173 = call float @llvm.AMDGPU.dp4(<4 x float> %168, <4 x float> %172)
  %174 = insertelement <4 x float> undef, float %139, i32 0						     |	  %174 = insertelement <4 x float> undef, float %143, i32 0
  %175 = insertelement <4 x float> %174, float %154, i32 1						     |	  %175 = insertelement <4 x float> %174, float %158, i32 1
  %176 = insertelement <4 x float> %175, float %169, i32 2						     |	  %176 = insertelement <4 x float> %175, float %173, i32 2
  %177 = insertelement <4 x float> %176, float 0.000000e+00, i32 3						  %177 = insertelement <4 x float> %176, float 0.000000e+00, i32 3
  %178 = call float @llvm.AMDGPU.dp4(<4 x float> %173, <4 x float> %177)				     |	  %178 = insertelement <4 x float> undef, float %143, i32 0
  %179 = call float @llvm.AMDGPU.rsq(float %178)							     |	  %179 = insertelement <4 x float> %178, float %158, i32 1
  %180 = fmul float %139, %179										     |	  %180 = insertelement <4 x float> %179, float %173, i32 2
  %181 = fmul float %154, %179										     |	  %181 = insertelement <4 x float> %180, float 0.000000e+00, i32 3
  %182 = fmul float %169, %179										     |	  %182 = call float @llvm.AMDGPU.dp4(<4 x float> %177, <4 x float> %181)
  %183 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 |	  %183 = call float @llvm.AMDGPU.rsq(float %182)
  %184 = extractelement <4 x float> %183, i32 0								     |	  %184 = fmul float %143, %183
  %185 = fmul float %4, %184										     |	  %185 = fmul float %158, %183
  %186 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 |	  %186 = fmul float %173, %183
  %187 = extractelement <4 x float> %186, i32 1								     |	  %187 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1
  %188 = fmul float %5, %187										     |	  %188 = extractelement <4 x float> %187, i32 0
  %189 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 |	  %189 = fmul float %9, %188
  %190 = extractelement <4 x float> %189, i32 2								     |	  %190 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1
  %191 = fmul float %190, %7										     |	  %191 = extractelement <4 x float> %190, i32 1
  %192 = fadd float %191, %185										     |	  %192 = fmul float %10, %191
  %193 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1	  %193 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1
  %194 = extractelement <4 x float> %193, i32 3								     |	  %194 = extractelement <4 x float> %193, i32 2
  %195 = fmul float %194, %7										     |	  %195 = fmul float %194, %11
  %196 = fadd float %195, %188										     |	  %196 = fadd float %195, %189
  %197 = load <4 x float> addrspace(8)* null								     |	  %197 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1
  %198 = extractelement <4 x float> %197, i32 0								     |	  %198 = extractelement <4 x float> %197, i32 3
  %199 = fmul float %198, %32										     |	  %199 = fmul float %198, %11
  %200 = load <4 x float> addrspace(8)* null								     |	  %200 = fadd float %199, %192
  %201 = extractelement <4 x float> %200, i32 1								     |	  %201 = load <4 x float> addrspace(8)* null
  %202 = fmul float %201, %32										     |	  %202 = extractelement <4 x float> %201, i32 0
  %203 = load <4 x float> addrspace(8)* null								     |	  %203 = fmul float %202, %36
  %204 = extractelement <4 x float> %203, i32 2								     |	  %204 = load <4 x float> addrspace(8)* null
  %205 = fmul float %204, %32										     |	  %205 = extractelement <4 x float> %204, i32 1
  %206 = load <4 x float> addrspace(8)* null								     |	  %206 = fmul float %205, %36
  %207 = extractelement <4 x float> %206, i32 3								     |	  %207 = load <4 x float> addrspace(8)* null
  %208 = fmul float %207, %32										     |	  %208 = extractelement <4 x float> %207, i32 2
  %209 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1 |	  %209 = fmul float %208, %36
  %210 = extractelement <4 x float> %209, i32 0								     |	  %210 = load <4 x float> addrspace(8)* null
  %211 = fmul float %210, %49										     |	  %211 = extractelement <4 x float> %210, i32 3
  %212 = fadd float %211, %199										     |	  %212 = fmul float %211, %36
  %213 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1	  %213 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1
  %214 = extractelement <4 x float> %213, i32 1								     |	  %214 = extractelement <4 x float> %213, i32 0
  %215 = fmul float %214, %49										     |	  %215 = fmul float %214, %53
  %216 = fadd float %215, %202										     |	  %216 = fadd float %215, %203
  %217 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1	  %217 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1
  %218 = extractelement <4 x float> %217, i32 2								     |	  %218 = extractelement <4 x float> %217, i32 1
  %219 = fmul float %218, %49										     |	  %219 = fmul float %218, %53
  %220 = fadd float %219, %205										     |	  %220 = fadd float %219, %206
  %221 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1	  %221 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1
  %222 = extractelement <4 x float> %221, i32 3								     |	  %222 = extractelement <4 x float> %221, i32 2
  %223 = fmul float %222, %49										     |	  %223 = fmul float %222, %53
  %224 = fadd float %223, %208										     |	  %224 = fadd float %223, %209
  %225 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2 |	  %225 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1
  %226 = extractelement <4 x float> %225, i32 0								     |	  %226 = extractelement <4 x float> %225, i32 3
  %227 = fmul float %226, %66										     |	  %227 = fmul float %226, %53
  %228 = fadd float %227, %212											  %228 = fadd float %227, %212
  %229 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2	  %229 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2
  %230 = extractelement <4 x float> %229, i32 1								     |	  %230 = extractelement <4 x float> %229, i32 0
  %231 = fmul float %230, %66										     |	  %231 = fmul float %230, %70
  %232 = fadd float %231, %216											  %232 = fadd float %231, %216
  %233 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2	  %233 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2
  %234 = extractelement <4 x float> %233, i32 2								     |	  %234 = extractelement <4 x float> %233, i32 1
  %235 = fmul float %234, %66										     |	  %235 = fmul float %234, %70
  %236 = fadd float %235, %220											  %236 = fadd float %235, %220
  %237 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2	  %237 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2
  %238 = extractelement <4 x float> %237, i32 3								     |	  %238 = extractelement <4 x float> %237, i32 2
  %239 = fmul float %238, %66										     |	  %239 = fmul float %238, %70
  %240 = fadd float %239, %224											  %240 = fadd float %239, %224
  %241 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3 |	  %241 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2
  %242 = extractelement <4 x float> %241, i32 0								     |	  %242 = extractelement <4 x float> %241, i32 3
  %243 = fadd float %228, %242										     |	  %243 = fmul float %242, %70
  %244 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3 |	  %244 = fadd float %243, %228
  %245 = extractelement <4 x float> %244, i32 1								     |	  %245 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3
  %246 = fadd float %232, %245										     |	  %246 = extractelement <4 x float> %245, i32 0
  %247 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3 |	  %247 = fadd float %232, %246
  %248 = extractelement <4 x float> %247, i32 2								     |	  %248 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3
  %249 = fadd float %236, %248										     |	  %249 = extractelement <4 x float> %248, i32 1
  %250 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3 |	  %250 = fadd float %236, %249
  %251 = extractelement <4 x float> %250, i32 3								     |	  %251 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3
  %252 = fadd float %240, %251										     |	  %252 = extractelement <4 x float> %251, i32 2
  %253 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 |	  %253 = fadd float %240, %252
  %254 = extractelement <4 x float> %253, i32 0								     |	  %254 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3
  %255 = fmul float %254, %180										     |	  %255 = extractelement <4 x float> %254, i32 3
  %256 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 |	  %256 = fadd float %244, %255
  %257 = extractelement <4 x float> %256, i32 1								     |	  %257 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4
  %258 = fmul float %257, %180										     |	  %258 = extractelement <4 x float> %257, i32 0
  %259 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 |	  %259 = fmul float %258, %184
  %260 = extractelement <4 x float> %259, i32 2								     |	  %260 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4
  %261 = fmul float %260, %180										     |	  %261 = extractelement <4 x float> %260, i32 1
  %262 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 |	  %262 = fmul float %261, %184
  %263 = extractelement <4 x float> %262, i32 3								     |	  %263 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4
  %264 = fmul float %263, %180										     |	  %264 = extractelement <4 x float> %263, i32 2
  %265 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 |	  %265 = fmul float %264, %184
  %266 = extractelement <4 x float> %265, i32 0								     |	  %266 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4
  %267 = fmul float %266, %181										     |	  %267 = extractelement <4 x float> %266, i32 3
  %268 = fadd float %267, %255										     |	  %268 = fmul float %267, %184
  %269 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5	  %269 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5
  %270 = extractelement <4 x float> %269, i32 1								     |	  %270 = extractelement <4 x float> %269, i32 0
  %271 = fmul float %270, %181										     |	  %271 = fmul float %270, %185
  %272 = fadd float %271, %258										     |	  %272 = fadd float %271, %259
  %273 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5	  %273 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5
  %274 = extractelement <4 x float> %273, i32 2								     |	  %274 = extractelement <4 x float> %273, i32 1
  %275 = fmul float %274, %181										     |	  %275 = fmul float %274, %185
  %276 = fadd float %275, %261										     |	  %276 = fadd float %275, %262
  %277 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5	  %277 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5
  %278 = extractelement <4 x float> %277, i32 3								     |	  %278 = extractelement <4 x float> %277, i32 2
  %279 = fmul float %278, %181										     |	  %279 = fmul float %278, %185
  %280 = fadd float %279, %264										     |	  %280 = fadd float %279, %265
  %281 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 |	  %281 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5
  %282 = extractelement <4 x float> %281, i32 0								     |	  %282 = extractelement <4 x float> %281, i32 3
  %283 = fmul float %282, %182										     |	  %283 = fmul float %282, %185
  %284 = fadd float %283, %268											  %284 = fadd float %283, %268
  %285 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6	  %285 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6
  %286 = extractelement <4 x float> %285, i32 1								     |	  %286 = extractelement <4 x float> %285, i32 0
  %287 = fmul float %286, %182										     |	  %287 = fmul float %286, %186
  %288 = fadd float %287, %272											  %288 = fadd float %287, %272
  %289 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6	  %289 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6
  %290 = extractelement <4 x float> %289, i32 2								     |	  %290 = extractelement <4 x float> %289, i32 1
  %291 = fmul float %290, %182										     |	  %291 = fmul float %290, %186
  %292 = fadd float %291, %276											  %292 = fadd float %291, %276
  %293 = fmul float %124, %181										     |	  %293 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6
  %294 = fmul float %122, %182										     |	  %294 = extractelement <4 x float> %293, i32 2
  %295 = fmul float %123, %180										     |	  %295 = fmul float %294, %186
  %296 = fsub float -0.000000e+00, %293									     |	  %296 = fadd float %295, %280
  %297 = fmul float %123, %182										     |	  %297 = fmul float %128, %185
  %298 = fadd float %297, %296										     |	  %298 = fmul float %126, %186
  %299 = fsub float -0.000000e+00, %294									     |	  %299 = fmul float %127, %184
  %300 = fmul float %124, %180										     |	  %300 = fsub float -0.000000e+00, %297
  %301 = fadd float %300, %299										     |	  %301 = fmul float %127, %186
  %302 = fsub float -0.000000e+00, %295									     |	  %302 = fadd float %301, %300
  %303 = fmul float %122, %181										     |	  %303 = fsub float -0.000000e+00, %298
  %304 = fadd float %303, %302										     |	  %304 = fmul float %128, %184
  %305 = fmul float %298, %15										     |	  %305 = fadd float %304, %303
  %306 = fmul float %301, %15										     |	  %306 = fsub float -0.000000e+00, %299
  %307 = fmul float %304, %15										     |	  %307 = fmul float %126, %185
  %308 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 |	  %308 = fadd float %307, %306
  %309 = extractelement <4 x float> %308, i32 0								     |	  %309 = fmul float %302, %19
  %310 = fmul float %309, %305										     |	  %310 = fmul float %305, %19
  %311 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 |	  %311 = fmul float %308, %19
  %312 = extractelement <4 x float> %311, i32 1								     |	  %312 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4
  %313 = fmul float %312, %305										     |	  %313 = extractelement <4 x float> %312, i32 0
  %314 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 |	  %314 = fmul float %313, %309
  %315 = extractelement <4 x float> %314, i32 2								     |	  %315 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4
  %316 = fmul float %315, %305										     |	  %316 = extractelement <4 x float> %315, i32 1
  %317 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 |	  %317 = fmul float %316, %309
  %318 = extractelement <4 x float> %317, i32 0								     |	  %318 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4
  %319 = fmul float %318, %306										     |	  %319 = extractelement <4 x float> %318, i32 2
  %320 = fadd float %319, %310										     |	  %320 = fmul float %319, %309
  %321 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5	  %321 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5
  %322 = extractelement <4 x float> %321, i32 1								     |	  %322 = extractelement <4 x float> %321, i32 0
  %323 = fmul float %322, %306										     |	  %323 = fmul float %322, %310
  %324 = fadd float %323, %313										     |	  %324 = fadd float %323, %314
  %325 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5	  %325 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5
  %326 = extractelement <4 x float> %325, i32 2								     |	  %326 = extractelement <4 x float> %325, i32 1
  %327 = fmul float %326, %306										     |	  %327 = fmul float %326, %310
  %328 = fadd float %327, %316										     |	  %328 = fadd float %327, %317
  %329 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 |	  %329 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5
  %330 = extractelement <4 x float> %329, i32 0								     |	  %330 = extractelement <4 x float> %329, i32 2
  %331 = fmul float %330, %307										     |	  %331 = fmul float %330, %310
  %332 = fadd float %331, %320											  %332 = fadd float %331, %320
  %333 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6	  %333 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6
  %334 = extractelement <4 x float> %333, i32 1								     |	  %334 = extractelement <4 x float> %333, i32 0
  %335 = fmul float %334, %307										     |	  %335 = fmul float %334, %311
  %336 = fadd float %335, %324											  %336 = fadd float %335, %324
  %337 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6	  %337 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6
  %338 = extractelement <4 x float> %337, i32 2								     |	  %338 = extractelement <4 x float> %337, i32 1
  %339 = fmul float %338, %307										     |	  %339 = fmul float %338, %311
  %340 = fadd float %339, %328											  %340 = fadd float %339, %328
  %341 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 |	  %341 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6
  %342 = extractelement <4 x float> %341, i32 0								     |	  %342 = extractelement <4 x float> %341, i32 2
  %343 = fmul float %342, %122										     |	  %343 = fmul float %342, %311
  %344 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 |	  %344 = fadd float %343, %332
  %345 = extractelement <4 x float> %344, i32 1								     |	  %345 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4
  %346 = fmul float %345, %122										     |	  %346 = extractelement <4 x float> %345, i32 0
  %347 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4 |	  %347 = fmul float %346, %126
  %348 = extractelement <4 x float> %347, i32 2								     |	  %348 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4
  %349 = fmul float %348, %122										     |	  %349 = extractelement <4 x float> %348, i32 1
  %350 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5 |	  %350 = fmul float %349, %126
  %351 = extractelement <4 x float> %350, i32 0								     |	  %351 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4
  %352 = fmul float %351, %123										     |	  %352 = extractelement <4 x float> %351, i32 2
  %353 = fadd float %352, %343										     |	  %353 = fmul float %352, %126
  %354 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5	  %354 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5
  %355 = extractelement <4 x float> %354, i32 1								     |	  %355 = extractelement <4 x float> %354, i32 0
  %356 = fmul float %355, %123										     |	  %356 = fmul float %355, %127
  %357 = fadd float %356, %346										     |	  %357 = fadd float %356, %347
  %358 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5	  %358 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5
  %359 = extractelement <4 x float> %358, i32 2								     |	  %359 = extractelement <4 x float> %358, i32 1
  %360 = fmul float %359, %123										     |	  %360 = fmul float %359, %127
  %361 = fadd float %360, %349										     |	  %361 = fadd float %360, %350
  %362 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6 |	  %362 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5
  %363 = extractelement <4 x float> %362, i32 0								     |	  %363 = extractelement <4 x float> %362, i32 2
  %364 = fmul float %363, %124										     |	  %364 = fmul float %363, %127
  %365 = fadd float %364, %353											  %365 = fadd float %364, %353
  %366 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6	  %366 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6
  %367 = extractelement <4 x float> %366, i32 1								     |	  %367 = extractelement <4 x float> %366, i32 0
  %368 = fmul float %367, %124										     |	  %368 = fmul float %367, %128
  %369 = fadd float %368, %357											  %369 = fadd float %368, %357
  %370 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6	  %370 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6
  %371 = extractelement <4 x float> %370, i32 2								     |	  %371 = extractelement <4 x float> %370, i32 1
  %372 = fmul float %371, %124										     |	  %372 = fmul float %371, %128
  %373 = fadd float %372, %361											  %373 = fadd float %372, %361
  %374 = insertelement <4 x float> undef, float %243, i32 0						     |	  %374 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6
  %375 = insertelement <4 x float> %374, float %246, i32 1						     |	  %375 = extractelement <4 x float> %374, i32 2
  %376 = insertelement <4 x float> %375, float %249, i32 2						     |	  %376 = fmul float %375, %128
  %377 = insertelement <4 x float> %376, float %252, i32 3						     |	  %377 = fadd float %376, %365
  call void @llvm.R600.store.swizzle(<4 x float> %377, i32 60, i32 1)					     |	  %378 = insertelement <4 x float> undef, float %247, i32 0
  %378 = insertelement <4 x float> undef, float %192, i32 0						     |	  %379 = insertelement <4 x float> %378, float %250, i32 1
  %379 = insertelement <4 x float> %378, float %196, i32 1						     |	  %380 = insertelement <4 x float> %379, float %253, i32 2
  %380 = insertelement <4 x float> %379, float %11, i32 2						     |	  %381 = insertelement <4 x float> %380, float %256, i32 3
  %381 = insertelement <4 x float> %380, float %7, i32 3						     |	  call void @llvm.R600.store.swizzle(<4 x float> %381, i32 60, i32 1)
  call void @llvm.R600.store.swizzle(<4 x float> %381, i32 0, i32 2)					     |	  %382 = insertelement <4 x float> undef, float %196, i32 0
  %382 = insertelement <4 x float> undef, float %284, i32 0						     |	  %383 = insertelement <4 x float> %382, float %200, i32 1
  %383 = insertelement <4 x float> %382, float %288, i32 1						     |	  %384 = insertelement <4 x float> %383, float %15, i32 2
  %384 = insertelement <4 x float> %383, float %292, i32 2						     |	  %385 = insertelement <4 x float> %384, float %11, i32 3
  %385 = insertelement <4 x float> %384, float %280, i32 3						     |	  call void @llvm.R600.store.swizzle(<4 x float> %385, i32 0, i32 2)
  call void @llvm.R600.store.swizzle(<4 x float> %385, i32 1, i32 2)					     |	  %386 = insertelement <4 x float> undef, float %288, i32 0
  %386 = insertelement <4 x float> undef, float %332, i32 0						     |	  %387 = insertelement <4 x float> %386, float %292, i32 1
  %387 = insertelement <4 x float> %386, float %336, i32 1						     |	  %388 = insertelement <4 x float> %387, float %296, i32 2
  %388 = insertelement <4 x float> %387, float %340, i32 2						     |	  %389 = insertelement <4 x float> %388, float %284, i32 3
  %389 = insertelement <4 x float> %388, float 0.000000e+00, i32 3					     |	  call void @llvm.R600.store.swizzle(<4 x float> %389, i32 1, i32 2)
  call void @llvm.R600.store.swizzle(<4 x float> %389, i32 2, i32 2)					     |	  %390 = insertelement <4 x float> undef, float %336, i32 0
  %390 = insertelement <4 x float> undef, float %365, i32 0						     |	  %391 = insertelement <4 x float> %390, float %340, i32 1
  %391 = insertelement <4 x float> %390, float %369, i32 1						     |	  %392 = insertelement <4 x float> %391, float %344, i32 2
  %392 = insertelement <4 x float> %391, float %373, i32 2						     <
  %393 = insertelement <4 x float> %392, float 0.000000e+00, i32 3						  %393 = insertelement <4 x float> %392, float 0.000000e+00, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %393, i32 3, i32 2)					     |	  call void @llvm.R600.store.swizzle(<4 x float> %393, i32 2, i32 2)
													     >	  %394 = insertelement <4 x float> undef, float %369, i32 0
													     >	  %395 = insertelement <4 x float> %394, float %373, i32 1
													     >	  %396 = insertelement <4 x float> %395, float %377, i32 2
													     >	  %397 = insertelement <4 x float> %396, float 0.000000e+00, i32 3
													     >	  call void @llvm.R600.store.swizzle(<4 x float> %397, i32 3, i32 2)
  ret void													  ret void
}														}

; Function Attrs: readnone											; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1								     <
													     <
; Function Attrs: readnone										     <
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1							declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1

; Function Attrs: readnone											; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #1									declare float @llvm.AMDGPU.rsq(float) #1

declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)							declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)

attributes #0 = { "ShaderType"="1" }										attributes #0 = { "ShaderType"="1" }
attributes #1 = { readnone }											attributes #1 = { readnone }
# Machine code for function main: Post SSA, not tracking liveness					     <
Function Live Ins: %T4_W, %T4_Z, %T4_Y, %T4_X, %T3_W, %T3_Z, %T3_Y, %T3_X, %T2_W, %T2_Y, %T2_X, %T1_W, %T1_Z <
													     <
BB#0: derived from LLVM BB %main_body									     <
    Live Ins: %T4_W %T4_Z %T4_Y %T4_X %T3_W %T3_Z %T3_Y %T3_X %T2_W %T2_Y %T2_X %T1_W %T1_Z %T1_Y %T1_X	     <
	CF_CALL_FS_R600											     <
	CF_ALU 8, 0, 0, 2, 0, 0, 0, 111, 1								     <
	R600_ExportSwz %T11_XYZW<kill>, 1, 60, 0, 1, 2, 3, 40, 0					     <
	R600_ExportSwz %T0_XYZW<kill>, 2, 0, 0, 1, 2, 3, 39, 0						     <
	R600_ExportSwz %T12_XYZW<kill>, 2, 1, 0, 1, 2, 3, 39, 0						     <
	R600_ExportSwz %T9_XYZW<kill>, 2, 2, 0, 1, 2, 4, 39, 0						     <
	R600_ExportSwz %T14_XYZW<kill>, 2, 3, 0, 1, 2, 4, 40, 1						     <
	CF_END_R600											     <
	ALU_CLAUSE 8											     <
	%T0_W<def> = MOV 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW<imp-def>	     <
	%T2_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_X<kill>, 0, 0, 0, -1, %KC0_139_X, 0, 0, 0, 2092, 1, pred <
	%T0_X<def> = MULADD_IEEE_r600 0, 0, %KC0_139_Z, 0, 0, 2094, %PV_W, 0, 0, -1, %PS<kill>, 0, 0, -1, 0, <
	%T2_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Y<kill>, 0, 0, 0, -1, %KC0_139_Y, 0, 0, 0, 2093, 1, pred <
	%T0_Y<def> = MULADD_IEEE_r600 0, 0, %KC0_139_W, 0, 0, 2095, %T0_W, 0, 0, -1, %PV_W<kill>, 0, 0, -1,  <
	%T0_Z<def> = MOV 1, 0, 0, 0, %T3_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW<imp-def>	     <
	%T2_X<def> = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_136_X, 0, 0, 0, 2080, %T3_X, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_Y<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_136_Y, 0, 0, 0, 2081, %T3_Y, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_Z<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_136_Z, 0, 0, 0, 2082, %T3_Z, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_W<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O <
	%T2_X<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_137_X, 0, 0, 0, 2084, %T3_X, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_Y<def> = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_137_Y, 0, 0, 0, 2085, %T3_Y, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_Z<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_137_Z, 0, 0, 0, 2086, %T3_Z, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_W<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O <
	%T2_X<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_138_X, 0, 0, 0, 2088, %T3_X, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_Y<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_138_Y, 0, 0, 0, 2089, %T3_Y, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_Z<def> = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_138_Z, 0, 0, 0, 2090, %T3_Z, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_W<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O <
	%T3_X<def> = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_136_X, 0, 0, 0, 2080, %T4_X, 0, 0, 0, -1, 0, pred:%PRE <
	%T3_Y<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_136_Y, 0, 0, 0, 2081, %T4_Y, 0, 0, 0, -1, 0, pred:%PRE <
	%T3_Z<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_136_Z, 0, 0, 0, 2082, %T4_Z, 0, 0, 0, -1, 0, pred:%PRE <
	%T3_W<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O <
	%T3_X<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_137_X, 0, 0, 0, 2084, %T4_X, 0, 0, 0, -1, 0, pred:%PRE <
	%T3_Y<def> = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_137_Y, 0, 0, 0, 2085, %T4_Y, 0, 0, 0, -1, 0, pred:%PRE <
	%T3_Z<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_137_Z, 0, 0, 0, 2086, %T4_Z, 0, 0, 0, -1, 0, pred:%PRE <
	%T3_W<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O <
	%T3_X<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_138_X, 0, 0, 0, 2088, %T4_X, 0, 0, 0, -1, 0, pred:%PRE <
	%T3_Y<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_138_Y, 0, 0, 0, 2089, %T4_Y, 0, 0, 0, -1, 0, pred:%PRE <
	%T3_Z<def> = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_138_Z, 0, 0, 0, 2090, %T4_Z, 0, 0, 0, -1, 0, pred:%PRE <
	%T3_W<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O <
	%T2_X<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %T3_X, 0, 0, 0, -1, %T3_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_O <
	%T2_Y<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %T3_Y, 0, 0, 0, -1, %T3_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_O <
	%T2_Z<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %T3_Z, 0, 0, 0, -1, %T3_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_O <
	%T2_W<def> = DOT4_r600 0, 0, 1, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O <
	%T2_W<def> = RECIPSQRT_CLAMPED_r600 1, 0, 0, 0, %PV_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0,  <
	%T3_X<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T2_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_O <
	%T3_Y<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T2_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_O <
	%T3_Z<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T2_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_O <
	%T3_W<def> = DOT4_r600 0, 0, 1, 0, 0, 0, %ZERO, 0, 0, 0, -1, %ZERO, 0, 0, 0, -1, 1, pred:%PRED_SEL_O <
	%T3_W<def> = RECIPSQRT_CLAMPED_r600 1, 0, 0, 0, %PV_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0,  <
	%T5_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Z<kill>, 0, 0, 0, -1, %PS, 0, 0, 0, -1, 0, pred:%PRED_SE <
	%T6_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T3_Y<kill>, 0, 0, 0, -1, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_ <
	%T3_Y<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_X<kill>, 0, 0, 0, -1, %T3_W, 0, 0, 0, -1, 0, pred:%PRED_ <
	%T2_Z<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %PV_W, 0, 0, 0, -1, %PS, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, <
	%T7_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T3_Z<kill>, 0, 0, 0, -1, %T2_W, 0, 0, 0, -1, 0, pred:%PRED_ <
	%T3_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Y<kill>, 0, 0, 0, -1, %T3_W<kill>, 0, 0, 0, -1, 1, pred: <
	%T2_Z<def> = MULADD_IEEE_r600 0, 0, %PS, 0, 0, -1, %PV_W, 0, 0, -1, %PV_Z<kill>, 1, 0, -1, 0, pred:% <
	%T8_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %PV_Y, 0, 0, 0, -1, %PV_W, 0, 0, 0, -1, 0, pred:%PRED_SEL_OF <
	%T2_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T3_X<kill>, 0, 0, 0, -1, %T2_W<kill>, 0, 0, 0, -1, 1, pred: <
	%T3_Z<def> = MULADD_IEEE_r600 0, 0, %T5_W, 0, 0, -1, %PS, 0, 0, -1, %PV_W<kill>, 1, 0, -1, 0, pred:% <
	%T8_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %PV_Z<kill>, 0, 0, 0, -1, %T4_W, 0, 0, 0, -1, 0, pred:%PRED_ <
	%T9_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T3_W, 0, 0, 0, -1, %PS, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, <
	%T2_Z<def> = MULADD_IEEE_r600 0, 0, %T3_Y, 0, 0, -1, %T6_W, 0, 0, -1, %PS<kill>, 1, 0, -1, 0, pred:% <
	%T9_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_X, 0, 0, 0, 2064, %PV_W, 0, 0, 0, -1, 0, pred:%PRED <
	%T10_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %PV_Z<kill>, 0, 0, 0, -1, %T4_W, 0, 0, 0, -1, 1, pred:%PRED <
	%T9_W<def> = MULADD_IEEE_r600 0, 0, %KC0_133_X, 0, 0, 2068, %PS, 0, 0, -1, %PV_W<kill>, 0, 0, -1, 0, <
	%T4_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %PV_Z<kill>, 0, 0, 0, -1, %T4_W<kill>, 0, 0, 0, -1, 1, pred: <
	%T9_X<def> = MULADD_IEEE_r600 0, 0, %KC0_134_X, 0, 0, 2072, %PS, 0, 0, -1, %PV_W<kill>, 0, 0, -1, 0, <
	%T11_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_X, 0, 0, 0, 2064, %T2_W, 0, 0, 0, -1, 1, pred:%PRE <
	%T2_X<def> = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_136_X, 0, 0, 0, 2080, %T1_X, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_Y<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_136_Y, 0, 0, 0, 2081, %T1_Y, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_Z<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_136_Z, 0, 0, 0, 2082, %T1_Z, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_W<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_136_W, 0, 0, 0, 2083, %T1_W, 0, 0, 0, -1, 1, pred:%PRE <
	%T2_Z<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_128_X, 0, 0, 0, 2048, %PV_X, 0, 0, 0, -1, 0, pred:%PRED <
	%T11_W<def> = MULADD_IEEE_r600 0, 0, %KC0_133_X, 0, 0, 2068, %T6_W, 0, 0, -1, %T11_W<kill>, 0, 0, -1 <
	%T12_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_W, 0, 0, 0, 2067, %T2_W, 0, 0, 0, -1, 1, pred:%PRE <
	%T12_W<def> = MULADD_IEEE_r600 0, 0, %KC0_133_W, 0, 0, 2071, %T6_W, 0, 0, -1, %PV_W<kill>, 0, 0, -1, <
	%T2_X<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_137_X, 0, 0, 0, 2084, %T1_X, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_Y<def> = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_137_Y, 0, 0, 0, 2085, %T1_Y, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_Z<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_137_Z, 0, 0, 0, 2086, %T1_Z, 0, 0, 0, -1, 0, pred:%PRE <
	%T2_W<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_137_W, 0, 0, 0, 2087, %T1_W, 0, 0, 0, -1, 1, pred:%PRE <
	%T12_X<def> = MULADD_IEEE_r600 0, 0, %KC0_134_X, 0, 0, 2072, %T7_W, 0, 0, -1, %T11_W<kill>, 0, 0, -1 <
	%T11_W<def> = MULADD_IEEE_r600 0, 0, %KC0_129_X, 0, 0, 2052, %PV_X, 0, 0, -1, %T2_Z<kill>, 0, 0, -1, <
	%T13_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Y, 0, 0, 0, 2065, %T2_W, 0, 0, 0, -1, 0, pred:%PRE <
	%T14_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_128_Y, 0, 0, 0, 2049, %T2_X, 0, 0, 0, -1, 1, pred:%PRE <
	%T1_X<def> = DOT4_r600 0, 0, 1, 0, 0, 0, %KC0_138_X, 0, 0, 0, 2088, %T1_X, 0, 0, 0, -1, 0, pred:%PRE <
	%T1_Y<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_138_Y, 0, 0, 0, 2089, %T1_Y, 0, 0, 0, -1, 0, pred:%PRE <
	%T1_Z<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_138_Z, 0, 0, 0, 2090, %T1_Z, 0, 0, 0, -1, 0, pred:%PRE <
	%T1_W<def> = DOT4_r600 0, 0, 0, 0, 0, 0, %KC0_138_W, 0, 0, 0, 2091, %T1_W, 0, 0, 0, -1, 1, pred:%PRE <
	%T1_Z<def> = MULADD_IEEE_r600 0, 0, %KC0_129_Y, 0, 0, 2053, %T2_Y, 0, 0, -1, %T14_W<kill>, 0, 0, -1, <
	%T1_W<def> = MULADD_IEEE_r600 0, 0, %KC0_133_Y, 0, 0, 2069, %T6_W, 0, 0, -1, %T13_W<kill>, 0, 0, -1, <
	%T11_W<def> = MULADD_IEEE_r600 0, 0, %KC0_130_X, 0, 0, 2056, %T1_X, 0, 0, -1, %T11_W<kill>, 0, 0, -1 <
	%T11_X<def> = ADD 0, 0, 1, 0, 0, 0, %PV_W<kill>, 0, 0, 0, -1, %KC0_131_X, 0, 0, 0, 2060, 0, pred:%PR <
	%T12_Y<def> = MULADD_IEEE_r600 0, 0, %KC0_134_Y, 0, 0, 2073, %T7_W, 0, 0, -1, %T1_W<kill>, 0, 0, -1, <
	%T1_W<def> = MULADD_IEEE_r600 0, 0, %KC0_130_Y, 0, 0, 2057, %T1_X, 0, 0, -1, %T1_Z<kill>, 0, 0, -1,  <
	%T11_Y<def> = ADD 0, 0, 1, 0, 0, 0, %PV_W<kill>, 0, 0, 0, -1, %KC0_131_Y, 0, 0, 0, 2061, 0, pred:%PR <
	%T1_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_X, 0, 0, 0, 2064, %T3_Y, 0, 0, 0, -1, 0, pred:%PRED <
	%T13_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Y, 0, 0, 0, 2065, %T8_W, 0, 0, 0, -1, 1, pred:%PRE <
	%T1_Z<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Y, 0, 0, 0, 2065, %T3_Y, 0, 0, 0, -1, 0, pred:%PRED <
	%T13_W<def> = MULADD_IEEE_r600 0, 0, %KC0_133_Y, 0, 0, 2069, %T10_W, 0, 0, -1, %PS<kill>, 0, 0, -1,  <
	%T1_W<def> = MULADD_IEEE_r600 0, 0, %KC0_133_X, 0, 0, 2068, %T3_W, 0, 0, -1, %PV_W<kill>, 0, 0, -1,  <
	%T14_X<def> = MULADD_IEEE_r600 0, 0, %KC0_134_X, 0, 0, 2072, %T5_W, 0, 0, -1, %PS<kill>, 0, 0, -1, 0 <
	%T9_Y<def> = MULADD_IEEE_r600 0, 0, %KC0_134_Y, 0, 0, 2073, %T4_W, 0, 0, -1, %PV_W<kill>, 0, 0, -1,  <
	%T1_W<def> = MULADD_IEEE_r600 0, 0, %KC0_133_Y, 0, 0, 2069, %T3_W, 0, 0, -1, %PV_Z<kill>, 0, 0, -1,  <
	%T14_Y<def> = MULADD_IEEE_r600 0, 0, %KC0_134_Y, 0, 0, 2073, %T5_W, 0, 0, -1, %PV_W<kill>, 0, 0, -1, <
	%T1_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Z, 0, 0, 0, 2066, %T3_Y<kill>, 0, 0, 0, -1, 1, pred <
	%T13_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_128_Z, 0, 0, 0, 2050, %T2_X, 0, 0, 0, -1, 1, pred:%PRE <
	%T1_Y<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Z, 0, 0, 0, 2066, %T2_W<kill>, 0, 0, 0, -1, 0, pred <
	%T1_Z<def> = MULADD_IEEE_r600 0, 0, %KC0_129_Z, 0, 0, 2054, %T2_Y, 0, 0, -1, %PV_W<kill>, 0, 0, -1,  <
	%T2_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Z, 0, 0, 0, 2066, %T8_W<kill>, 0, 0, 0, -1, 1, pred <
	%T1_W<def> = MULADD_IEEE_r600 0, 0, %KC0_133_Z, 0, 0, 2070, %T3_W<kill>, 0, 0, -1, %T1_W<kill>, 0, 0 <
	%T14_Z<def> = MULADD_IEEE_r600 0, 0, %KC0_134_Z, 0, 0, 2074, %T5_W<kill>, 0, 0, -1, %PV_W<kill>, 0,  <
	%T1_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_128_W, 0, 0, 0, 2051, %T2_X<kill>, 0, 0, 0, -1, 1, pred <
	%T2_W<def> = MULADD_IEEE_r600 0, 0, %KC0_133_Z, 0, 0, 2070, %T10_W<kill>, 0, 0, -1, %T2_W<kill>, 0,  <
	%T9_Z<def> = MULADD_IEEE_r600 0, 0, %KC0_134_Z, 0, 0, 2074, %T4_W<kill>, 0, 0, -1, %PV_W<kill>, 0, 0 <
	%T1_W<def> = MULADD_IEEE_r600 0, 0, %KC0_129_W, 0, 0, 2055, %T2_Y<kill>, 0, 0, -1, %T1_W<kill>, 0, 0 <
	%T2_W<def> = MULADD_IEEE_r600 0, 0, %KC0_130_Z, 0, 0, 2058, %T1_X, 0, 0, -1, %T1_Z<kill>, 0, 0, -1,  <
	%T11_Z<def> = ADD 0, 0, 1, 0, 0, 0, %PV_W<kill>, 0, 0, 0, -1, %KC0_131_Z, 0, 0, 0, 2062, 0, pred:%PR <
	%T1_W<def> = MULADD_IEEE_r600 0, 0, %KC0_130_W, 0, 0, 2059, %T1_X<kill>, 0, 0, -1, %T1_W<kill>, 0, 0 <
	%T2_W<def> = MULADD_IEEE_r600 0, 0, %KC0_133_Z, 0, 0, 2070, %T6_W<kill>, 0, 0, -1, %T1_Y<kill>, 0, 0 <
	%T12_Z<def> = MULADD_IEEE_r600 0, 0, %KC0_134_Z, 0, 0, 2074, %T7_W<kill>, 0, 0, -1, %PV_W<kill>, 0,  <
	%T11_W<def> = ADD 0, 0, 1, 0, 0, 0, %T1_W<kill>, 0, 0, 0, -1, %KC0_131_W, 0, 0, 0, 2063, 1, pred:%PR <
													     <
# End machine code for function main.									     <
													     <
													     <
Shader Disassembly:											     <
													     <
	CALL_FS                                                                  ; 00000000 89800000	     <
	ALU 111, @8, KC0[CB0:0-32], KC1[]                                        ; 80000008 A1BC0000	     <
	EXPORT T11.XYZW                                                          ; C005A03C 94000688	     <
	EXPORT T0.XYZW                                                           ; C0004000 93800688	     <
	EXPORT T12.XYZW                                                          ; C0064001 93800688	     <
	EXPORT T9.XYZ0                                                           ; C004C002 93800888	     <
	EXPORT T14.XYZ0                                                          ; C0074003 94200888	     <
	CF_END                                                                   ; 00000000 80200000	     <
	ALU clause starting at 8:                                                ; 			     <
	  MOV                     T0.W, T2.W,                                    ; 00000C02 60001910	     <
	  MUL_IEEE              * T2.W, T2.X, KC0[11].X,                         ; 80116002 60400210	     <
	  MULADD_IEEE             T0.X, KC0[11].Z, PV.W, PS,                     ; 019FC88B 000280FF	     <
	  MUL_IEEE              * T2.W, T2.Y, KC0[11].Y,                         ; 80916402 60400210	     <
	  MULADD_IEEE             T0.Y, KC0[11].W, T0.W, PV.W,                   ; 01800C8B 20028CFE	     <
	  MOV                   * T0.Z, T3.W,                                    ; 80000C03 40001910	     <
	  DOT4                    T2.X, KC0[8].X, T3.X,                          ; 00006088 00405010	     <
	  DOT4                    T2.Y (MASKED), KC0[8].Y, T3.Y,                 ; 00806488 20405000	     <
	  DOT4                    T2.Z (MASKED), KC0[8].Z, T3.Z,                 ; 01006888 40405000	     <
	  DOT4                  * T2.W (MASKED), 0.0, 0.0,                       ; 801F00F8 60405000	     <
	  DOT4                    T2.X (MASKED), KC0[9].X, T3.X,                 ; 00006089 00405000	     <
	  DOT4                    T2.Y, KC0[9].Y, T3.Y,                          ; 00806489 20405010	     <
	  DOT4                    T2.Z (MASKED), KC0[9].Z, T3.Z,                 ; 01006889 40405000	     <
	  DOT4                  * T2.W (MASKED), 0.0, 0.0,                       ; 801F00F8 60405000	     <
	  DOT4                    T2.X (MASKED), KC0[10].X, T3.X,                ; 0000608A 00405000	     <
	  DOT4                    T2.Y (MASKED), KC0[10].Y, T3.Y,                ; 0080648A 20405000	     <
	  DOT4                    T2.Z, KC0[10].Z, T3.Z,                         ; 0100688A 40405010	     <
	  DOT4                  * T2.W (MASKED), 0.0, 0.0,                       ; 801F00F8 60405000	     <
	  DOT4                    T3.X, KC0[8].X, T4.X,                          ; 00008088 00605010	     <
	  DOT4                    T3.Y (MASKED), KC0[8].Y, T4.Y,                 ; 00808488 20605000	     <
	  DOT4                    T3.Z (MASKED), KC0[8].Z, T4.Z,                 ; 01008888 40605000	     <
	  DOT4                  * T3.W (MASKED), 0.0, 0.0,                       ; 801F00F8 60605000	     <
	  DOT4                    T3.X (MASKED), KC0[9].X, T4.X,                 ; 00008089 00605000	     <
	  DOT4                    T3.Y, KC0[9].Y, T4.Y,                          ; 00808489 20605010	     <
	  DOT4                    T3.Z (MASKED), KC0[9].Z, T4.Z,                 ; 01008889 40605000	     <
	  DOT4                  * T3.W (MASKED), 0.0, 0.0,                       ; 801F00F8 60605000	     <
	  DOT4                    T3.X (MASKED), KC0[10].X, T4.X,                ; 0000808A 00605000	     <
	  DOT4                    T3.Y (MASKED), KC0[10].Y, T4.Y,                ; 0080848A 20605000	     <
	  DOT4                    T3.Z, KC0[10].Z, T4.Z,                         ; 0100888A 40605010	     <
	  DOT4                  * T3.W (MASKED), 0.0, 0.0,                       ; 801F00F8 60605000	     <
	  DOT4                    T2.X (MASKED), T3.X, T3.X,                     ; 00006003 00405000	     <
	  DOT4                    T2.Y (MASKED), T3.Y, T3.Y,                     ; 00806403 20405000	     <
	  DOT4                    T2.Z (MASKED), T3.Z, T3.Z,                     ; 01006803 40405000	     <
	  DOT4                  * T2.W, 0.0, 0.0,                                ; 801F00F8 60405010	     <
	  RECIPSQRT_CLAMPED     * T2.W, PV.X,                                    ; 800000FE 60406710	     <
	  DOT4                    T3.X (MASKED), T2.X, T2.X,                     ; 00004002 00605000	     <
	  DOT4                    T3.Y (MASKED), T2.Y, T2.Y,                     ; 00804402 20605000	     <
	  DOT4                    T3.Z (MASKED), T2.Z, T2.Z,                     ; 01004802 40605000	     <
	  DOT4                  * T3.W, 0.0, 0.0,                                ; 801F00F8 60605010	     <
	  RECIPSQRT_CLAMPED     * T3.W, PV.X,                                    ; 800000FE 60606710	     <
	  MUL_IEEE                T5.W, T2.Z, PS,                                ; 001FE802 60A00210	     <
	  MUL_IEEE              * T6.W, T3.Y, T2.W,                              ; 81804403 60C00210	     <
	  MUL_IEEE                T3.Y, T2.X, T3.W,                              ; 01806002 20600210	     <
	  MUL_IEEE                T2.Z, PV.W, PS,                                ; 001FECFE 40400210	     <
	  MUL_IEEE                T7.W, T3.Z, T2.W,  BS:VEC_021/SCL_122          ; 01804803 60E40210	     <
	  MUL_IEEE              * T3.W, T2.Y, T3.W,                              ; 81806402 60600210	     <
	  MULADD_IEEE             T2.Z, PS, PV.W, -PV.Z,                         ; 019FC0FF 404298FE	     <
	  MUL_IEEE                T8.W, PV.Y, PV.W,                              ; 019FC4FE 61000210	     <
	  MUL_IEEE              * T2.W, T3.X, T2.W,                              ; 81804003 60400210	     <
	  MULADD_IEEE             T3.Z, T5.W, PS, -PV.W,                         ; 001FEC05 40629CFE	     <
	  MUL_IEEE                T8.W, PV.Z, T4.W,                              ; 018088FE 61000210	     <
	  MUL_IEEE              * T9.W, T3.W, PS,                                ; 801FEC03 61200210	     <
	  MULADD_IEEE             T2.Z, T3.Y, T6.W, -PS, BS:VEC_021/SCL_122      ; 0180C403 404690FF	     <
	  MUL_IEEE                T9.W, KC0[4].X, PV.W,                          ; 019FC084 61200210	     <
	  MUL_IEEE              * T10.W, PV.Z, T4.W,                             ; 818088FE 61400210	     <
	  MULADD_IEEE             T9.W, KC0[5].X, PS, PV.W,                      ; 001FE085 61228CFE	     <
	  MUL_IEEE              * T4.W, PV.Z, T4.W,                              ; 818088FE 60800210	     <
	  MULADD_IEEE             T9.X, KC0[6].X, PS, PV.W,                      ; 001FE086 01228CFE	     <
	  MUL_IEEE              * T11.W, KC0[4].X, T2.W,                         ; 81804084 61600210	     <
	  DOT4                    T2.X, KC0[8].X, T1.X,                          ; 00002088 00405010	     <
	  DOT4                    T2.Y (MASKED), KC0[8].Y, T1.Y,                 ; 00802488 20405000	     <
	  DOT4                    T2.Z (MASKED), KC0[8].Z, T1.Z,                 ; 01002888 40405000	     <
	  DOT4                  * T2.W (MASKED), KC0[8].W, T1.W,                 ; 81802C88 60405000	     <
	  MUL_IEEE                T2.Z, KC0[0].X, PV.X,                          ; 001FC080 40400210	     <
	  MULADD_IEEE           * T11.W, KC0[5].X, T6.W, T11.W,                  ; 8180C085 61628C0B	     <
	  MUL_IEEE              * T12.W, KC0[4].W, T2.W,                         ; 81804C84 61800210	     <
	  MULADD_IEEE           * T12.W, KC0[5].W, T6.W, PV.W,                   ; 8180CC85 61828CFE	     <
	  DOT4                    T2.X (MASKED), KC0[9].X, T1.X,                 ; 00002089 00405000	     <
	  DOT4                    T2.Y, KC0[9].Y, T1.Y,                          ; 00802489 20405010	     <
	  DOT4                    T2.Z (MASKED), KC0[9].Z, T1.Z,                 ; 01002889 40405000	     <
	  DOT4                  * T2.W (MASKED), KC0[9].W, T1.W,                 ; 81802C89 60405000	     <
	  MULADD_IEEE             T12.X, KC0[6].X, T7.W, T11.W,                  ; 0180E086 01828C0B	     <
	  MULADD_IEEE           * T11.W, KC0[1].X, PV.X, T2.Z,                   ; 801FC081 61628802	     <
	  MUL_IEEE                T13.W, KC0[4].Y, T2.W,                         ; 01804484 61A00210	     <
	  MUL_IEEE              * T14.W, KC0[0].Y, T2.X,                         ; 80004480 61C00210	     <
	  DOT4                    T1.X, KC0[10].X, T1.X,                         ; 0000208A 00205010	     <
	  DOT4                    T1.Y (MASKED), KC0[10].Y, T1.Y,                ; 0080248A 20205000	     <
	  DOT4                    T1.Z (MASKED), KC0[10].Z, T1.Z,                ; 0100288A 40205000	     <
	  DOT4                  * T1.W (MASKED), KC0[10].W, T1.W,                ; 81802C8A 60205000	     <
	  MULADD_IEEE             T1.Z, KC0[1].Y, T2.Y, T14.W,                   ; 00804481 40228C0E	     <
	  MULADD_IEEE           * T1.W, KC0[5].Y, T6.W, T13.W, BS:VEC_201        ; 8180C485 60328C0D	     <
	  MULADD_IEEE           * T11.W, KC0[2].X, T1.X, T11.W,                  ; 80002082 61628C0B	     <
	  ADD                     T11.X, PV.W, KC0[3].X,                         ; 00106CFE 01600010	     <
	  MULADD_IEEE           * T12.Y, KC0[6].Y, T7.W, T1.W,                   ; 8180E486 21828C01	     <
	  MULADD_IEEE           * T1.W, KC0[2].Y, T1.X, T1.Z,                    ; 80002482 60228801	     <
	  ADD                     T11.Y, PV.W, KC0[3].Y,                         ; 00906CFE 21600010	     <
	  MUL_IEEE                T1.W, KC0[4].X, T3.Y,                          ; 00806084 60200210	     <
	  MUL_IEEE              * T13.W, KC0[4].Y, T8.W,                         ; 81810484 61A00210	     <
	  MUL_IEEE                T1.Z, KC0[4].Y, T3.Y,                          ; 00806484 40200210	     <
	  MULADD_IEEE             T13.W, KC0[5].Y, T10.W, PS,                    ; 01814485 61A280FF	     <
	  MULADD_IEEE           * T1.W, KC0[5].X, T3.W, PV.W, BS:VEC_021/SCL_122 ; 81806085 60268CFE	     <
	  MULADD_IEEE             T14.X, KC0[6].X, T5.W, PS,                     ; 0180A086 01C280FF	     <
	  MULADD_IEEE             T9.Y, KC0[6].Y, T4.W, PV.W, BS:VEC_021/SCL_122 ; 01808486 21268CFE	     <
	  MULADD_IEEE           * T1.W, KC0[5].Y, T3.W, PV.Z, BS:VEC_102/SCL_221 ; 81806485 602E88FE	     <
	  MULADD_IEEE             T14.Y, KC0[6].Y, T5.W, PV.W,                   ; 0180A486 21C28CFE	     <
	  MUL_IEEE              * T1.W, KC0[4].Z, T3.Y,                          ; 80806884 60200210	     <
	  MUL_IEEE              * T13.W, KC0[0].Z, T2.X,                         ; 80004880 61A00210	     <
	  MUL_IEEE                T1.Y, KC0[4].Z, T2.W,                          ; 01804884 20200210	     <
	  MULADD_IEEE             T1.Z, KC0[1].Z, T2.Y, PV.W,                    ; 00804881 40228CFE	     <
	  MUL_IEEE              * T2.W, KC0[4].Z, T8.W,  BS:VEC_021/SCL_122      ; 81810884 60440210	     <
	  MULADD_IEEE           * T1.W, KC0[5].Z, T3.W, T1.W,                    ; 81806885 60228C01	     <
	  MULADD_IEEE             T14.Z, KC0[6].Z, T5.W, PV.W,                   ; 0180A886 41C28CFE	     <
	  MUL_IEEE              * T1.W, KC0[0].W, T2.X,                          ; 80004C80 60200210	     <
	  MULADD_IEEE           * T2.W, KC0[5].Z, T10.W, T2.W,                   ; 81814885 60428C02	     <
	  MULADD_IEEE             T9.Z, KC0[6].Z, T4.W, PV.W,                    ; 01808886 41228CFE	     <
	  MULADD_IEEE           * T1.W, KC0[1].W, T2.Y, T1.W,                    ; 80804C81 60228C01	     <
	  MULADD_IEEE           * T2.W, KC0[2].Z, T1.X, T1.Z,                    ; 80002882 60428801	     <
	  ADD                     T11.Z, PV.W, KC0[3].Z,                         ; 01106CFE 41600010	     <
	  MULADD_IEEE           * T1.W, KC0[2].W, T1.X, T1.W,                    ; 80002C82 60228C01	     <
	  MULADD_IEEE           * T2.W, KC0[5].Z, T6.W, T1.Y,                    ; 8180C885 60428401	     <
	  MULADD_IEEE             T12.Z, KC0[6].Z, T7.W, PV.W,                   ; 0180E886 41828CFE	     <
	  ADD                   * T11.W, T1.W, KC0[3].W,                         ; 81906C01 61600010	     <
													     <