--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, }
   {src_offset = 16, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, }

===== SHADER #1 ========================================= FETCH/RV670/R600 =====
===== 12 dw ===== 3 gprs ===== 0 stack =========================================
0000  00000002 81000400 VTX 2 @4
 0004  7c00a000 88cd1001 00080000 VFETCH              R1.xyzw, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1)
 0008  7c00a000 88cd1002 00080010 VFETCH              R2.xyzw, R0.x + 16b ,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, }
   {src_offset = 16, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_SINT, }

===== SHADER #2 ========================================= FETCH/RV670/R600 =====
===== 12 dw ===== 3 gprs ===== 0 stack =========================================
0000  00000002 81000400 VTX 2 @4
 0004  7c00a000 88cd1001 00080000 VFETCH              R1.xyzw, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1)
 0008  7c00a000 d88d1002 00080010 VFETCH              R2.xyzw, R0.x + 16b ,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:34 NUM:1 COMP:1 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, }
   {src_offset = 16, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_UINT, }

===== SHADER #3 ========================================= FETCH/RV670/R600 =====
===== 12 dw ===== 3 gprs ===== 0 stack =========================================
0000  00000002 81000400 VTX 2 @4
 0004  7c00a000 88cd1001 00080000 VFETCH              R1.xyzw, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1)
 0008  7c00a000 988d1002 00080010 VFETCH              R2.xyzw, R0.x + 16b ,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:34 NUM:1 COMP:0 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32_UINT, }

===== SHADER #4 ========================================= FETCH/RV670/R600 =====
===== 8 dw ===== 2 gprs ===== 0 stack ==========================================
0000  00000002 81000000 VTX 1 @4
 0004  7c00a000 93564001 00080000 VFETCH              R1.x001, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:13 NUM:1 COMP:0 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32_UINT, }

===== SHADER #5 ========================================= FETCH/RV670/R600 =====
===== 8 dw ===== 2 gprs ===== 0 stack ==========================================
0000  00000002 81000000 VTX 1 @4
 0004  7c00a000 97561001 00080000 VFETCH              R1.xy01, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:29 NUM:1 COMP:0 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32_UINT, }

===== SHADER #6 ========================================= FETCH/RV670/R600 =====
===== 8 dw ===== 2 gprs ===== 0 stack ==========================================
0000  00000002 81000000 VTX 1 @4
 0004  7c00a000 9bd51001 00080000 VFETCH              R1.xyz1, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:47 NUM:1 COMP:0 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_UINT, }

===== SHADER #7 ========================================= FETCH/RV670/R600 =====
===== 8 dw ===== 2 gprs ===== 0 stack ==========================================
0000  00000002 81000000 VTX 1 @4
 0004  7c00a000 988d1001 00080000 VFETCH              R1.xyzw, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:34 NUM:1 COMP:0 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
  0: MOV OUT[0], IN[0]
  1: MOV OUT[1], IN[1]
  2: END
; ModuleID = 'tgsi'

define void @main() #0 {
main_body:
  %0 = call float @llvm.R600.load.input(i32 4)
  %1 = call float @llvm.R600.load.input(i32 5)
  %2 = call float @llvm.R600.load.input(i32 6)
  %3 = call float @llvm.R600.load.input(i32 7)
  %4 = call float @llvm.R600.load.input(i32 8)
  %5 = call float @llvm.R600.load.input(i32 9)
  %6 = call float @llvm.R600.load.input(i32 10)
  %7 = call float @llvm.R600.load.input(i32 11)
  %8 = insertelement <4 x float> undef, float %0, i32 0
  %9 = insertelement <4 x float> %8, float %1, i32 1
  %10 = insertelement <4 x float> %9, float %2, i32 2
  %11 = insertelement <4 x float> %10, float %3, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %11, i32 60, i32 1)
  %12 = insertelement <4 x float> undef, float %4, i32 0
  %13 = insertelement <4 x float> %12, float %5, i32 1
  %14 = insertelement <4 x float> %13, float %6, i32 2
  %15 = insertelement <4 x float> %14, float %7, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %15, i32 0, i32 2)
  ret void
}

; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1

declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { readnone }
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T2_W in %vreg0, %T2_Z in %vreg1, %T2_Y in %vreg2, %T2_X in %vreg3, %T1_W in %vreg4, %T1_Z in %vreg5, %T1_Y in %vreg6, %T1_X in %vreg7

BB#0: derived from LLVM BB %main_body
    Live Ins: %T2_W %T2_Z %T2_Y %T2_X %T1_W %T1_Z %T1_Y %T1_X
	CF_CALL_FS_R600
	%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-def>
	%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
	%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
	%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
	%T2_X<def> = KILL %T2_X, %T2_XYZW<imp-def>
	%T2_Y<def> = KILL %T2_Y, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	%T2_Z<def> = KILL %T2_Z, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	%T2_W<def> = KILL %T2_W, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	R600_ExportSwz %T1_XYZW<kill>, 1, 60, 0, 1, 2, 3, 40, 0
	R600_ExportSwz %T2_XYZW<kill>, 2, 0, 0, 1, 2, 3, 40, 1
	CF_END_R600

# End machine code for function main.


===== SHADER #8 ============================================ VS/RV670/R600 =====
===== 8 dw ===== 3 gprs ===== 1 stack ==========================================
0000  00000000 89800000 CALL_FS @0
0002  c000a03c 94400688 EXPORT_DONE        POS   60   R1.xyzw  VPM
0004  c0014000 94600688 EXPORT_DONE        PARAM 0    R2.xyzw  VPM  EOP
===== SHADER_END ===============================================================

--------------------------------------------------------------
VERT
DCL IN[0]
DCL OUT[0], POSITION
  0: MOV OUT[0], IN[0]
  1: END
STREAMOUT
  0: MEM_STREAM0_BUF0[0..0] <- OUT[0].x
; ModuleID = 'tgsi'

define void @main() #0 {
main_body:
  %0 = call float @llvm.R600.load.input(i32 4)
  %1 = call float @llvm.R600.load.input(i32 5)
  %2 = call float @llvm.R600.load.input(i32 6)
  %3 = call float @llvm.R600.load.input(i32 7)
  %4 = insertelement <4 x float> undef, float %0, i32 0
  %5 = insertelement <4 x float> %4, float %1, i32 1
  %6 = insertelement <4 x float> %5, float %2, i32 2
  %7 = insertelement <4 x float> %6, float %3, i32 3
  call void @llvm.R600.store.stream.output(<4 x float> %7, i32 0, i32 0, i32 1)
  %8 = insertelement <4 x float> undef, float %0, i32 0
  %9 = insertelement <4 x float> %8, float %1, i32 1
  %10 = insertelement <4 x float> %9, float %2, i32 2
  %11 = insertelement <4 x float> %10, float %3, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %11, i32 60, i32 1)
  call void @llvm.R600.store.dummy(i32 2)
  ret void
}

; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1

declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)

declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)

declare void @llvm.R600.store.dummy(i32)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { readnone }
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3

BB#0: derived from LLVM BB %main_body
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
	CF_CALL_FS_R600
	%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-def>
	%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
	%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
	%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
	R600_ExportBuf %T1_XYZW, 0, 0, 4095, 1, 32, 0
	R600_ExportSwz %T1_XYZW<kill>, 1, 60, 0, 1, 2, 3, 40, 0
	R600_ExportSwz %T0_XYZW<undef>, 2, 0, 7, 7, 7, 7, 40, 1
	CF_END_R600
	PAD

# End machine code for function main.


===== SHADER #9 ============================================ VS/RV670/R600 =====
===== 12 dw ===== 2 gprs ===== 1 stack =========================================
0000  00000000 89800000 CALL_FS @0
0002  00008000 90401fff MEM_STREAM0        WRITE 0    R1.x___  ES:0  VPM
0004  c000a03c 94400688 EXPORT_DONE        POS   60   R1.xyzw  VPM
0006  c0004000 94600fff EXPORT_DONE        PARAM 0    R0.____  VPM  EOP
===== SHADER_END ===============================================================

--------------------------------------------------------------
FRAG
DCL IN[0], GENERIC[0], CONSTANT
  0: END
; ModuleID = 'tgsi'

define void @main() #0 {
main_body:
  %0 = call float @llvm.R600.load.input(i32 0)
  %1 = call float @llvm.R600.load.input(i32 1)
  %2 = call float @llvm.R600.load.input(i32 2)
  %3 = call float @llvm.R600.load.input(i32 3)
  call void @llvm.R600.store.dummy(i32 0)
  ret void
}

; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1

declare void @llvm.R600.store.dummy(i32)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { readnone }
# Machine code for function main: Post SSA, not tracking liveness

BB#0: derived from LLVM BB %main_body
	R600_ExportSwz %T0_XYZW<undef>, 0, 0, 7, 7, 7, 7, 40, 1
	CF_END_R600

# End machine code for function main.


===== SHADER #10 =========================================== PS/RV670/R600 =====
===== 4 dw ===== 1 gprs ===== 1 stack ==========================================
0000  c0000000 94600fff EXPORT_DONE        PIXEL 0    R0.____  VPM  EOP
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, }
   {src_offset = 16, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, }

===== SHADER #11 ======================================== FETCH/RV670/R600 =====
===== 12 dw ===== 3 gprs ===== 0 stack =========================================
0000  00000002 81000400 VTX 2 @4
 0004  7c00a000 88cd1001 00080000 VFETCH              R1.xyzw, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1)
 0008  7c00a000 88cd1002 00080010 VFETCH              R2.xyzw, R0.x + 16b ,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, }
   {src_offset = 16, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_SINT, }

===== SHADER #12 ======================================== FETCH/RV670/R600 =====
===== 12 dw ===== 3 gprs ===== 0 stack =========================================
0000  00000002 81000400 VTX 2 @4
 0004  7c00a000 88cd1001 00080000 VFETCH              R1.xyzw, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1)
 0008  7c00a000 d88d1002 00080010 VFETCH              R2.xyzw, R0.x + 16b ,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:34 NUM:1 COMP:1 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, }
   {src_offset = 16, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_UINT, }

===== SHADER #13 ======================================== FETCH/RV670/R600 =====
===== 12 dw ===== 3 gprs ===== 0 stack =========================================
0000  00000002 81000400 VTX 2 @4
 0004  7c00a000 88cd1001 00080000 VFETCH              R1.xyzw, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1)
 0008  7c00a000 988d1002 00080010 VFETCH              R2.xyzw, R0.x + 16b ,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:34 NUM:1 COMP:0 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32_UINT, }

===== SHADER #14 ======================================== FETCH/RV670/R600 =====
===== 8 dw ===== 2 gprs ===== 0 stack ==========================================
0000  00000002 81000000 VTX 1 @4
 0004  7c00a000 93564001 00080000 VFETCH              R1.x001, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:13 NUM:1 COMP:0 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32_UINT, }

===== SHADER #15 ======================================== FETCH/RV670/R600 =====
===== 8 dw ===== 2 gprs ===== 0 stack ==========================================
0000  00000002 81000000 VTX 1 @4
 0004  7c00a000 97561001 00080000 VFETCH              R1.xy01, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:29 NUM:1 COMP:0 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32_UINT, }

===== SHADER #16 ======================================== FETCH/RV670/R600 =====
===== 8 dw ===== 2 gprs ===== 0 stack ==========================================
0000  00000002 81000000 VTX 1 @4
 0004  7c00a000 9bd51001 00080000 VFETCH              R1.xyz1, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:47 NUM:1 COMP:0 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_UINT, }

===== SHADER #17 ======================================== FETCH/RV670/R600 =====
===== 8 dw ===== 2 gprs ===== 0 stack ==========================================
0000  00000002 81000000 VTX 1 @4
 0004  7c00a000 988d1001 00080000 VFETCH              R1.xyzw, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:34 NUM:1 COMP:0 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
  0: MOV OUT[0], IN[0]
  1: MOV OUT[1], IN[1]
  2: END
; ModuleID = 'tgsi'

define void @main() #0 {
main_body:
  %0 = call float @llvm.R600.load.input(i32 4)
  %1 = call float @llvm.R600.load.input(i32 5)
  %2 = call float @llvm.R600.load.input(i32 6)
  %3 = call float @llvm.R600.load.input(i32 7)
  %4 = call float @llvm.R600.load.input(i32 8)
  %5 = call float @llvm.R600.load.input(i32 9)
  %6 = call float @llvm.R600.load.input(i32 10)
  %7 = call float @llvm.R600.load.input(i32 11)
  %8 = insertelement <4 x float> undef, float %0, i32 0
  %9 = insertelement <4 x float> %8, float %1, i32 1
  %10 = insertelement <4 x float> %9, float %2, i32 2
  %11 = insertelement <4 x float> %10, float %3, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %11, i32 60, i32 1)
  %12 = insertelement <4 x float> undef, float %4, i32 0
  %13 = insertelement <4 x float> %12, float %5, i32 1
  %14 = insertelement <4 x float> %13, float %6, i32 2
  %15 = insertelement <4 x float> %14, float %7, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %15, i32 0, i32 2)
  ret void
}

; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1

declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { readnone }
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T2_W in %vreg0, %T2_Z in %vreg1, %T2_Y in %vreg2, %T2_X in %vreg3, %T1_W in %vreg4, %T1_Z in %vreg5, %T1_Y in %vreg6, %T1_X in %vreg7

BB#0: derived from LLVM BB %main_body
    Live Ins: %T2_W %T2_Z %T2_Y %T2_X %T1_W %T1_Z %T1_Y %T1_X
	CF_CALL_FS_R600
	%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-def>
	%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
	%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
	%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
	%T2_X<def> = KILL %T2_X, %T2_XYZW<imp-def>
	%T2_Y<def> = KILL %T2_Y, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	%T2_Z<def> = KILL %T2_Z, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	%T2_W<def> = KILL %T2_W, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	R600_ExportSwz %T1_XYZW<kill>, 1, 60, 0, 1, 2, 3, 40, 0
	R600_ExportSwz %T2_XYZW<kill>, 2, 0, 0, 1, 2, 3, 40, 1
	CF_END_R600

# End machine code for function main.


===== SHADER #18 =========================================== VS/RV670/R600 =====
===== 8 dw ===== 3 gprs ===== 1 stack ==========================================
0000  00000000 89800000 CALL_FS @0
0002  c000a03c 94400688 EXPORT_DONE        POS   60   R1.xyzw  VPM
0004  c0014000 94600688 EXPORT_DONE        PARAM 0    R2.xyzw  VPM  EOP
===== SHADER_END ===============================================================

--------------------------------------------------------------
VERT
DCL IN[0]
DCL OUT[0], POSITION
  0: MOV OUT[0], IN[0]
  1: END
STREAMOUT
  0: MEM_STREAM0_BUF0[0..0] <- OUT[0].x
; ModuleID = 'tgsi'

define void @main() #0 {
main_body:
  %0 = call float @llvm.R600.load.input(i32 4)
  %1 = call float @llvm.R600.load.input(i32 5)
  %2 = call float @llvm.R600.load.input(i32 6)
  %3 = call float @llvm.R600.load.input(i32 7)
  %4 = insertelement <4 x float> undef, float %0, i32 0
  %5 = insertelement <4 x float> %4, float %1, i32 1
  %6 = insertelement <4 x float> %5, float %2, i32 2
  %7 = insertelement <4 x float> %6, float %3, i32 3
  call void @llvm.R600.store.stream.output(<4 x float> %7, i32 0, i32 0, i32 1)
  %8 = insertelement <4 x float> undef, float %0, i32 0
  %9 = insertelement <4 x float> %8, float %1, i32 1
  %10 = insertelement <4 x float> %9, float %2, i32 2
  %11 = insertelement <4 x float> %10, float %3, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %11, i32 60, i32 1)
  call void @llvm.R600.store.dummy(i32 2)
  ret void
}

; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1

declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)

declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)

declare void @llvm.R600.store.dummy(i32)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { readnone }
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3

BB#0: derived from LLVM BB %main_body
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
	CF_CALL_FS_R600
	%T1_X<def> = KILL %T1_X, %T1_XYZW<imp-def>
	%T1_Y<def> = KILL %T1_Y, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
	%T1_Z<def> = KILL %T1_Z, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
	%T1_W<def> = KILL %T1_W, %T1_XYZW<imp-use,kill>, %T1_XYZW<imp-def>
	R600_ExportBuf %T1_XYZW, 0, 0, 4095, 1, 32, 0
	R600_ExportSwz %T1_XYZW<kill>, 1, 60, 0, 1, 2, 3, 40, 0
	R600_ExportSwz %T0_XYZW<undef>, 2, 0, 7, 7, 7, 7, 40, 1
	CF_END_R600
	PAD

# End machine code for function main.


===== SHADER #19 =========================================== VS/RV670/R600 =====
===== 12 dw ===== 2 gprs ===== 1 stack =========================================
0000  00000000 89800000 CALL_FS @0
0002  00008000 90401fff MEM_STREAM0        WRITE 0    R1.x___  ES:0  VPM
0004  c000a03c 94400688 EXPORT_DONE        POS   60   R1.xyzw  VPM
0006  c0004000 94600fff EXPORT_DONE        PARAM 0    R0.____  VPM  EOP
===== SHADER_END ===============================================================

--------------------------------------------------------------
FRAG
DCL IN[0], GENERIC[0], CONSTANT
  0: END
; ModuleID = 'tgsi'

define void @main() #0 {
main_body:
  %0 = call float @llvm.R600.load.input(i32 0)
  %1 = call float @llvm.R600.load.input(i32 1)
  %2 = call float @llvm.R600.load.input(i32 2)
  %3 = call float @llvm.R600.load.input(i32 3)
  call void @llvm.R600.store.dummy(i32 0)
  ret void
}

; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1

declare void @llvm.R600.store.dummy(i32)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { readnone }
# Machine code for function main: Post SSA, not tracking liveness

BB#0: derived from LLVM BB %main_body
	R600_ExportSwz %T0_XYZW<undef>, 0, 0, 7, 7, 7, 7, 40, 1
	CF_END_R600

# End machine code for function main.


===== SHADER #20 =========================================== PS/RV670/R600 =====
===== 4 dw ===== 1 gprs ===== 1 stack ==========================================
0000  c0000000 94600fff EXPORT_DONE        PIXEL 0    R0.____  VPM  EOP
===== SHADER_END ===============================================================

--------------------------------------------------------------
Running synchronized to the vertical refresh.  The framerate should be
approximately the same as the monitor refresh rate.
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], COLOR, COLOR
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main() #0 {
main_body:
  %0 = call float @llvm.R600.load.input(i32 0)
  %1 = call float @llvm.R600.load.input(i32 1)
  %2 = call float @llvm.R600.load.input(i32 2)
  %3 = call float @llvm.R600.load.input(i32 3)
  %4 = insertelement <4 x float> undef, float %0, i32 0
  %5 = insertelement <4 x float> %4, float %1, i32 1
  %6 = insertelement <4 x float> %5, float %2, i32 2
  %7 = insertelement <4 x float> %6, float %3, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %7, i32 0, i32 0)
  ret void
}

; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1

declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { readnone }
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T0_W in %vreg0, %T0_Z in %vreg1, %T0_Y in %vreg2, %T0_X in %vreg3

BB#0: derived from LLVM BB %main_body
    Live Ins: %T0_W %T0_Z %T0_Y %T0_X
	%T0_X<def> = KILL %T0_X, %T0_XYZW<imp-def>
	%T0_Y<def> = KILL %T0_Y, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T0_Z<def> = KILL %T0_Z, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T0_W<def> = KILL %T0_W, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	R600_ExportSwz %T0_XYZW<kill>, 0, 0, 0, 1, 2, 3, 40, 1
	CF_END_R600

# End machine code for function main.


===== SHADER #21 =========================================== PS/RV670/R600 =====
===== 4 dw ===== 1 gprs ===== 1 stack ==========================================
0000  c0000000 94600688 EXPORT_DONE        PIXEL 0    R0.xyzw  VPM  EOP
===== SHADER_END ===============================================================

--------------------------------------------------------------
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL IN[5]
DCL IN[6]
DCL OUT[0], POSITION
DCL OUT[1], COLOR
DCL CONST[0..10]
DCL TEMP[0..6]
  0: MUL TEMP[0], IN[0].xxxx, CONST[0]
  1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0]
  2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0]
  3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0]
  4: DP3 TEMP[1].x, IN[1], IN[1]
  5: RSQ TEMP[1].x, TEMP[1]
  6: MUL TEMP[0], IN[1], TEMP[1].xxxx
  7: MOV TEMP[2].w, IN[6].xxxx
  8: MOV TEMP[3], IN[3]
  9: MAD TEMP[3].xyz, CONST[4], IN[2], IN[5]
 10: MOV_SAT OUT[1], TEMP[3]
 11: DP3 TEMP[2].x, TEMP[0], CONST[5]
 12: DP3 TEMP[2].y, TEMP[0], CONST[7]
 13: MUL TEMP[4], CONST[8], IN[2]
 14: MUL TEMP[5], CONST[9], IN[3]
 15: MUL TEMP[6], CONST[10], IN[4]
 16: LIT TEMP[1], TEMP[2]
 17: ADD TEMP[3], TEMP[4], TEMP[3]
 18: MAD TEMP[3], TEMP[1].yyyy, TEMP[5], TEMP[3]
 19: MAD_SAT OUT[1].xyz, TEMP[1].zzzz, TEMP[6], TEMP[3]
 20: END
; ModuleID = 'tgsi'

define void @main() #0 {
main_body:
  %0 = call float @llvm.R600.load.input(i32 4)
  %1 = call float @llvm.R600.load.input(i32 5)
  %2 = call float @llvm.R600.load.input(i32 6)
  %3 = call float @llvm.R600.load.input(i32 7)
  %4 = call float @llvm.R600.load.input(i32 8)
  %5 = call float @llvm.R600.load.input(i32 9)
  %6 = call float @llvm.R600.load.input(i32 10)
  %7 = call float @llvm.R600.load.input(i32 11)
  %8 = call float @llvm.R600.load.input(i32 12)
  %9 = call float @llvm.R600.load.input(i32 13)
  %10 = call float @llvm.R600.load.input(i32 14)
  %11 = call float @llvm.R600.load.input(i32 15)
  %12 = call float @llvm.R600.load.input(i32 16)
  %13 = call float @llvm.R600.load.input(i32 17)
  %14 = call float @llvm.R600.load.input(i32 18)
  %15 = call float @llvm.R600.load.input(i32 19)
  %16 = call float @llvm.R600.load.input(i32 20)
  %17 = call float @llvm.R600.load.input(i32 21)
  %18 = call float @llvm.R600.load.input(i32 22)
  %19 = call float @llvm.R600.load.input(i32 23)
  %20 = call float @llvm.R600.load.input(i32 24)
  %21 = call float @llvm.R600.load.input(i32 25)
  %22 = call float @llvm.R600.load.input(i32 26)
  %23 = call float @llvm.R600.load.input(i32 27)
  %24 = call float @llvm.R600.load.input(i32 28)
  %25 = call float @llvm.R600.load.input(i32 29)
  %26 = call float @llvm.R600.load.input(i32 30)
  %27 = call float @llvm.R600.load.input(i32 31)
  %28 = load <4 x float> addrspace(8)* null
  %29 = extractelement <4 x float> %28, i32 0
  %30 = fmul float %0, %29
  %31 = load <4 x float> addrspace(8)* null
  %32 = extractelement <4 x float> %31, i32 1
  %33 = fmul float %0, %32
  %34 = load <4 x float> addrspace(8)* null
  %35 = extractelement <4 x float> %34, i32 2
  %36 = fmul float %0, %35
  %37 = load <4 x float> addrspace(8)* null
  %38 = extractelement <4 x float> %37, i32 3
  %39 = fmul float %0, %38
  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
  %41 = extractelement <4 x float> %40, i32 0
  %42 = fmul float %1, %41
  %43 = fadd float %42, %30
  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
  %45 = extractelement <4 x float> %44, i32 1
  %46 = fmul float %1, %45
  %47 = fadd float %46, %33
  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
  %49 = extractelement <4 x float> %48, i32 2
  %50 = fmul float %1, %49
  %51 = fadd float %50, %36
  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
  %53 = extractelement <4 x float> %52, i32 3
  %54 = fmul float %1, %53
  %55 = fadd float %54, %39
  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
  %57 = extractelement <4 x float> %56, i32 0
  %58 = fmul float %2, %57
  %59 = fadd float %58, %43
  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
  %61 = extractelement <4 x float> %60, i32 1
  %62 = fmul float %2, %61
  %63 = fadd float %62, %47
  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
  %65 = extractelement <4 x float> %64, i32 2
  %66 = fmul float %2, %65
  %67 = fadd float %66, %51
  %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
  %69 = extractelement <4 x float> %68, i32 3
  %70 = fmul float %2, %69
  %71 = fadd float %70, %55
  %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
  %73 = extractelement <4 x float> %72, i32 0
  %74 = fmul float %3, %73
  %75 = fadd float %74, %59
  %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
  %77 = extractelement <4 x float> %76, i32 1
  %78 = fmul float %3, %77
  %79 = fadd float %78, %63
  %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
  %81 = extractelement <4 x float> %80, i32 2
  %82 = fmul float %3, %81
  %83 = fadd float %82, %67
  %84 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
  %85 = extractelement <4 x float> %84, i32 3
  %86 = fmul float %3, %85
  %87 = fadd float %86, %71
  %88 = insertelement <4 x float> undef, float %4, i32 0
  %89 = insertelement <4 x float> %88, float %5, i32 1
  %90 = insertelement <4 x float> %89, float %6, i32 2
  %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 3
  %92 = insertelement <4 x float> undef, float %4, i32 0
  %93 = insertelement <4 x float> %92, float %5, i32 1
  %94 = insertelement <4 x float> %93, float %6, i32 2
  %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3
  %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95)
  %97 = call float @fabs(float %96)
  %98 = call float @llvm.AMDGPU.rsq(float %97)
  %99 = fmul float %4, %98
  %100 = fmul float %5, %98
  %101 = fmul float %6, %98
  %102 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
  %103 = extractelement <4 x float> %102, i32 0
  %104 = fmul float %103, %8
  %105 = fadd float %104, %20
  %106 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
  %107 = extractelement <4 x float> %106, i32 1
  %108 = fmul float %107, %9
  %109 = fadd float %108, %21
  %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
  %111 = extractelement <4 x float> %110, i32 2
  %112 = fmul float %111, %10
  %113 = fadd float %112, %22
  %114 = call float @llvm.AMDIL.clamp.(float %105, float 0.000000e+00, float 1.000000e+00)
  %115 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00)
  %116 = call float @llvm.AMDIL.clamp.(float %113, float 0.000000e+00, float 1.000000e+00)
  %117 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
  %118 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
  %119 = extractelement <4 x float> %118, i32 0
  %120 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
  %121 = extractelement <4 x float> %120, i32 1
  %122 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
  %123 = extractelement <4 x float> %122, i32 2
  %124 = insertelement <4 x float> undef, float %99, i32 0
  %125 = insertelement <4 x float> %124, float %100, i32 1
  %126 = insertelement <4 x float> %125, float %101, i32 2
  %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 3
  %128 = insertelement <4 x float> undef, float %119, i32 0
  %129 = insertelement <4 x float> %128, float %121, i32 1
  %130 = insertelement <4 x float> %129, float %123, i32 2
  %131 = insertelement <4 x float> %130, float 0.000000e+00, i32 3
  %132 = call float @llvm.AMDGPU.dp4(<4 x float> %127, <4 x float> %131)
  %133 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
  %134 = extractelement <4 x float> %133, i32 0
  %135 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
  %136 = extractelement <4 x float> %135, i32 1
  %137 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
  %138 = extractelement <4 x float> %137, i32 2
  %139 = insertelement <4 x float> undef, float %99, i32 0
  %140 = insertelement <4 x float> %139, float %100, i32 1
  %141 = insertelement <4 x float> %140, float %101, i32 2
  %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 3
  %143 = insertelement <4 x float> undef, float %134, i32 0
  %144 = insertelement <4 x float> %143, float %136, i32 1
  %145 = insertelement <4 x float> %144, float %138, i32 2
  %146 = insertelement <4 x float> %145, float 0.000000e+00, i32 3
  %147 = call float @llvm.AMDGPU.dp4(<4 x float> %142, <4 x float> %146)
  %148 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
  %149 = extractelement <4 x float> %148, i32 0
  %150 = fmul float %149, %8
  %151 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
  %152 = extractelement <4 x float> %151, i32 1
  %153 = fmul float %152, %9
  %154 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
  %155 = extractelement <4 x float> %154, i32 2
  %156 = fmul float %155, %10
  %157 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %158 = extractelement <4 x float> %157, i32 0
  %159 = fmul float %158, %12
  %160 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %161 = extractelement <4 x float> %160, i32 1
  %162 = fmul float %161, %13
  %163 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %164 = extractelement <4 x float> %163, i32 2
  %165 = fmul float %164, %14
  %166 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
  %167 = extractelement <4 x float> %166, i32 0
  %168 = fmul float %167, %16
  %169 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
  %170 = extractelement <4 x float> %169, i32 1
  %171 = fmul float %170, %17
  %172 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
  %173 = extractelement <4 x float> %172, i32 2
  %174 = fmul float %173, %18
  %175 = fcmp uge float %132, 0.000000e+00
  %176 = select i1 %175, float %132, float 0.000000e+00
  %177 = fcmp uge float %147, 0.000000e+00
  %178 = select i1 %177, float %147, float 0.000000e+00
  %179 = call float @llvm.pow.f32(float %178, float %24)
  %180 = fcmp ult float %132, 0.000000e+00
  %181 = select i1 %180, float 0.000000e+00, float %179
  %182 = fadd float %150, %105
  %183 = fadd float %153, %109
  %184 = fadd float %156, %113
  %185 = fmul float %176, %159
  %186 = fadd float %185, %182
  %187 = fmul float %176, %162
  %188 = fadd float %187, %183
  %189 = fmul float %176, %165
  %190 = fadd float %189, %184
  %191 = fmul float %181, %168
  %192 = fadd float %191, %186
  %193 = fmul float %181, %171
  %194 = fadd float %193, %188
  %195 = fmul float %181, %174
  %196 = fadd float %195, %190
  %197 = call float @llvm.AMDIL.clamp.(float %192, float 0.000000e+00, float 1.000000e+00)
  %198 = call float @llvm.AMDIL.clamp.(float %194, float 0.000000e+00, float 1.000000e+00)
  %199 = call float @llvm.AMDIL.clamp.(float %196, float 0.000000e+00, float 1.000000e+00)
  %200 = insertelement <4 x float> undef, float %75, i32 0
  %201 = insertelement <4 x float> %200, float %79, i32 1
  %202 = insertelement <4 x float> %201, float %83, i32 2
  %203 = insertelement <4 x float> %202, float %87, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %203, i32 60, i32 1)
  %204 = insertelement <4 x float> undef, float %197, i32 0
  %205 = insertelement <4 x float> %204, float %198, i32 1
  %206 = insertelement <4 x float> %205, float %199, i32 2
  %207 = insertelement <4 x float> %206, float %117, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %207, i32 0, i32 2)
  ret void
}

; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1

; Function Attrs: readonly
declare float @fabs(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #1

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { readnone }
attributes #2 = { readonly }
attributes #3 = { nounwind readonly }
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T7_X in %vreg0, %T6_Z in %vreg1, %T6_Y in %vreg2, %T6_X in %vreg3, %T5_Z in %vreg4, %T5_Y in %vreg5, %T5_X in %vreg6, %T4_W in %vreg7, %T4_Z in %vreg8, %T4_Y in %vreg9, %T4_X in %vreg10, %T3_Z in %vreg11, %T3_Y in %vreg12, %T3_X in %vreg13, %T2_Z in %vreg14, %T2_Y in %vreg15, %T2_X in %vreg16, %T1_W in %vreg17, %T1_Z in %vreg18, %T1_Y in %vreg19, %T1_X in %vreg20

BB#0: derived from LLVM BB %main_body
    Live Ins: %T7_X %T6_Z %T6_Y %T6_X %T5_Z %T5_Y %T5_X %T4_W %T4_Z %T4_Y %T4_X %T3_Z %T3_Y %T3_X %T2_Z %T2_Y %T2_X %T1_W %T1_Z %T1_Y %T1_X
	CF_CALL_FS_R600
	%T2_X<def> = KILL %T2_X, %T2_XYZW<imp-def>
	%T2_Y<def> = KILL %T2_Y, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	%T2_Z<def> = KILL %T2_Z, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	CF_ALU 0, 0, 0, 2, 0, 0, 0, 67
	%T0_X<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_X, 0, 0, 0, 2048, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_Y<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_Y, 0, 0, 0, 2049, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_Z<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_Z, 0, 0, 0, 2050, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_W<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	BUNDLE %T0_X<imp-def>, %T0_Y<imp-def>, %T0_Z<imp-def>, %T0_W<imp-def>, %T2_X<imp-use>, %PRED_SEL_OFF<imp-use>, %T2_Y<imp-use>, %T2_Z<imp-use>, %T2_W<imp-use>
	  * %T0_X<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T2_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T0_Y<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T2_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T0_Z<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T2_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T0_W<def> = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T8_X<def> = RECIPSQRT_CLAMPED_r600 1, 0, 0, 0, %T0_W<kill>, 0, 0, 1, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T7_Y<def> = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_X, 0, 0, 2052, %T0_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T7_Z<def> = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_Y, 0, 0, 2053, %T0_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_W<def> = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_Z, 0, 0, 2054, %T0_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_X<def> = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_X, 0, 0, 2056, %T7_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T7_Y<def> = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_Y, 0, 0, 2057, %T7_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_Z<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T8_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	%T5_W<def> = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_Z, 0, 0, 2058, %T0_W<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_X<def> = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_X, 0, 0, 2060, %T0_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW<imp-def>
	%T2_Y<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T8_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	%T7_Z<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_137_Z, 0, 0, 0, 2086, %T4_Z<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_X<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T8_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	%T5_Y<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_138_Y, 0, 0, 0, 2089, %T5_Y<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T4_Z<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_138_Z, 0, 0, 0, 2090, %T5_Z<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_138_X, 0, 0, 0, 2088, %T5_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T5_X<def> = MULADD_IEEE_r600 0, 0, %KC0_132_X, 0, 0, 2064, %T3_X, 0, 0, -1, %T6_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T6_Y<def> = MULADD_IEEE_r600 0, 0, %KC0_132_Y, 0, 0, 2065, %T3_Y, 0, 0, -1, %T6_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T5_Z<def> = MULADD_IEEE_r600 0, 0, %KC0_132_Z, 0, 0, 2066, %T3_Z, 0, 0, -1, %T6_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_X<def> = MULADD_IEEE_r600 0, 0, %KC0_136_X, 0, 0, 2080, %T3_X<kill>, 0, 0, -1, %T5_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T6_Y<def> = MULADD_IEEE_r600 0, 0, %KC0_136_Y, 0, 0, 2081, %T3_Y<kill>, 0, 0, -1, %T6_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_Z<def> = MULADD_IEEE_r600 0, 0, %KC0_136_Z, 0, 0, 2082, %T3_Z<kill>, 0, 0, -1, %T5_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T1_X<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X<kill>, 0, 0, 0, -1, %KC0_128_W, 0, 0, 0, 2051, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_Y<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_137_X, 0, 0, 0, 2084, %T4_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T5_Z<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_137_Y, 0, 0, 0, 2085, %T4_Y<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T1_X<def> = MULADD_IEEE_r600 0, 0, %T1_Y<kill>, 0, 0, -1, %KC0_129_W, 0, 0, 2055, %T1_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T9_Z<def> = MOV 1, 0, 0, 0, %KC0_135_Z, 0, 0, 0, 2078, 1, pred:%PRED_SEL_OFF, 0, 0, %T9_XYZW<imp-def>
	%T1_X<def> = MULADD_IEEE_r600 0, 0, %T1_Z<kill>, 0, 0, -1, %KC0_130_W, 0, 0, 2059, %T1_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T9_Y<def> = MOV 1, 0, 0, 0, %KC0_135_Y, 0, 0, 0, 2077, 1, pred:%PRED_SEL_OFF, 0, 0, %T9_XYZW<imp-use,kill>, %T9_XYZW<imp-def>
	%T9_X<def> = MOV 1, 0, 0, 0, %KC0_135_X, 0, 0, 0, 2076, 1, pred:%PRED_SEL_OFF, 0, 0, %T9_XYZW<imp-use,kill>, %T9_XYZW<imp-def>
	%T8_Z<def> = MOV 1, 0, 0, 0, %KC0_133_Z, 0, 0, 0, 2070, 1, pred:%PRED_SEL_OFF, 0, 0, %T8_XYZW<imp-def>
	%T8_Y<def> = MOV 1, 0, 0, 0, %KC0_133_Y, 0, 0, 0, 2069, 1, pred:%PRED_SEL_OFF, 0, 0, %T8_XYZW<imp-use,kill>, %T8_XYZW<imp-def>
	%T9_W<def> = MOV 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T9_XYZW<imp-def>
	BUNDLE %T1_X<imp-def>, %T1_Y<imp-def>, %T1_Z<imp-def>, %T1_W<imp-def>, %T2_X<imp-use>, %T9_X<imp-use>, %PRED_SEL_OFF<imp-use>, %T2_Y<imp-use>, %T9_Y<imp-use>, %T2_Z<imp-use>, %T9_Z<imp-use>, %T2_W<imp-use>, %T9_W<imp-use>
	  * %T1_X<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T9_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_Y<def> = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T9_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_Z<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T9_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_W<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T9_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T4_X<def> = CNDGE_r600 0, 0, %T1_Y<kill>, 0, 0, -1, %T1_Y, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_W<def> = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_W, 0, 0, 2063, %T1_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T1_X<def> = LOG_IEEE_r600 1, 0, 0, 0, %T4_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_Z<def> = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_Z, 0, 0, 2062, %T5_W<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T1_X<def> = MUL 0, 0, 1, 0, 0, 0, %T7_X<kill>, 0, 0, 0, -1, %T1_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_Y<def> = MULADD_IEEE_r600 0, 0, %T1_W<kill>, 0, 0, -1, %KC0_131_Y, 0, 0, 2061, %T7_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T1_X<def> = EXP_IEEE_r600 1, 0, 0, 0, %T1_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T8_X<def> = MOV 1, 0, 0, 0, %KC0_133_X, 0, 0, 0, 2068, 1, pred:%PRED_SEL_OFF, 0, 0, %T8_XYZW<imp-use,kill>, %T8_XYZW<imp-def>
	%T8_W<def> = MOV 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T8_XYZW<imp-def>
	BUNDLE %T1_X<imp-def>, %T1_Y<imp-def>, %T1_Z<imp-def>, %T1_W<imp-def>, %T2_X<imp-use>, %T8_X<imp-use>, %PRED_SEL_OFF<imp-use>, %T2_Y<imp-use>, %T8_Y<imp-use>, %T2_Z<imp-use>, %T8_Z<imp-use>, %T2_W<imp-use>, %T8_W<imp-use>
	  * %T1_X<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T8_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_Y<def> = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T8_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_Z<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T8_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_W<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T8_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T1_X<def> = CNDGE_r600 0, 0, %T1_Y, 0, 0, -1, %T1_X<kill>, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_Y<def> = CNDGE_r600 0, 0, %T1_Y<kill>, 0, 0, -1, %T1_Y, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_X<def> = MULADD_IEEE_r600 0, 0, %T2_Y, 0, 0, -1, %T7_Z<kill>, 0, 0, -1, %T3_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T1_Y<def> = MULADD_IEEE_r600 0, 0, %T2_Y, 0, 0, -1, %T5_Z<kill>, 0, 0, -1, %T6_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T1_Z<def> = MULADD_IEEE_r600 0, 0, %T2_Y<kill>, 0, 0, -1, %T3_Y<kill>, 0, 0, -1, %T3_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T5_Y<def> = MULADD_IEEE_r600 0, 1, %T1_X, 0, 0, -1, %T5_Y<kill>, 0, 0, -1, %T1_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW<imp-def>
	%T5_Z<def> = MULADD_IEEE_r600 0, 1, %T1_X, 0, 0, -1, %T4_Z<kill>, 0, 0, -1, %T2_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW<imp-use,kill>, %T5_XYZW<imp-def>
	%T5_X<def> = MULADD_IEEE_r600 0, 1, %T1_X<kill>, 0, 0, -1, %T3_W<kill>, 0, 0, -1, %T1_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW<imp-use,kill>, %T5_XYZW<imp-def>
	%T5_W<def> = MOV 1, 0, 0, 1, %T4_W<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW<imp-use,kill>, %T5_XYZW<imp-def>
	R600_ExportSwz %T0_XYZW<kill>, 1, 60, 0, 1, 2, 3, 40, 0
	R600_ExportSwz %T5_XYZW<kill>, 2, 0, 0, 1, 2, 3, 40, 1
	CF_END_R600
	PAD

# End machine code for function main.


===== SHADER #22 =========================================== VS/RV670/R600 =====
===== 144 dw ===== 10 gprs ===== 1 stack =======================================
0000  00000000 89800000 CALL_FS @0
0002  80000006 a1040000 ALU 66 @12 KC0[CB0:0-31]
 0012  00100001 00000210     1      x: MUL_IEEE           R0.x,  R1.x, KC0[0].x
 0014  00900001 20000210            y: MUL_IEEE           R0.y,  R1.x, KC0[0].y
 0016  01100001 40000210            z: MUL_IEEE           R0.z,  R1.x, KC0[0].z
 0018  800000f8 60401910            w: MOV                R2.w,  0
 0020  00004002 00005000     2      x: DOT4               __.x,  R2.x, R2.x
 0022  00804402 20005000            y: DOT4               __.y,  R2.y, R2.y
 0024  01004802 40005000            z: DOT4               __.z,  R2.z, R2.z
 0026  819fccfe 60005010            w: DOT4               R0.w,  PV.w, PV.w
 0028  00102401 20e28000     3      y: MULADD_IEEE        R7.y,  R1.y, KC0[1].x, R0.x
 0030  00902401 40e28400            z: MULADD_IEEE        R7.z,  R1.y, KC0[1].y, R0.y
 0032  01102401 60028800            w: MULADD_IEEE        R0.w,  R1.y, KC0[1].z, R0.z
 0034  800000fe 01006711            t: RECIPSQRT_CLAMPED  R8.x,  |PV.x|
 0036  00104801 000284fe     4      x: MULADD_IEEE        R0.x,  R1.z, KC0[2].x, PV.y
 0038  00904801 20e288fe            y: MULADD_IEEE        R7.y,  R1.z, KC0[2].y, PV.z
 0040  001fe802 40480210            z: MUL_IEEE           R2.z,  R2.z, PS               VEC_120
 0042  81104801 60a28cfe            w: MULADD_IEEE        R5.w,  R1.z, KC0[2].z, PV.w
 0044  00106c01 000280fe     5      x: MULADD_IEEE        R0.x,  R1.w, KC0[3].x, PV.x
 0046  00010402 20400210            y: MUL_IEEE           R2.y,  R2.y, R8.x
 0048  01008889 40e00210            z: MUL_IEEE           R7.z,  KC0[9].z, R4.z
 0050  80010002 00400210            t: MUL_IEEE           R2.x,  R2.x, R8.x
 0052  00006084 00aa8006     6      x: MULADD_IEEE        R5.x,  KC0[4].x, R3.x, R6.x   VEC_120
 0054  0080a48a 20a00210            y: MUL_IEEE           R5.y,  KC0[10].y, R5.y
 0056  0100a88a 40800210            z: MUL_IEEE           R4.z,  KC0[10].z, R5.z
 0058  8000a08a 60600210            w: MUL_IEEE           R3.w,  KC0[10].x, R5.x
 0060  00006088 006280fe     7      x: MULADD_IEEE        R3.x,  KC0[8].x, R3.x, PV.x
 0062  00806484 20c28406            y: MULADD_IEEE        R6.y,  KC0[4].y, R3.y, R6.y
 0064  81006884 40a28806            z: MULADD_IEEE        R5.z,  KC0[4].z, R3.z, R6.z
 0066  01900001 00200210     8      x: MUL_IEEE           R1.x,  R1.x, KC0[0].w
 0068  00806488 20c284fe            y: MULADD_IEEE        R6.y,  KC0[8].y, R3.y, PV.y
 0070  01006888 406288fe            z: MULADD_IEEE        R3.z,  KC0[8].z, R3.z, PV.z
 0072  80008089 20600210            t: MUL_IEEE           R3.y,  KC0[9].x, R4.x
 0074  01902401 002280fe     9      x: MULADD_IEEE        R1.x,  R1.y, KC0[1].w, PV.x
 0076  00808489 40a00210            z: MUL_IEEE           R5.z,  KC0[9].y, R4.y
 0078  80000887 41201910            t: MOV                R9.z,  KC0[7].z
 0080  01904801 002280fe    10      x: MULADD_IEEE        R1.x,  R1.z, KC0[2].w, PV.x
 0082  00000487 21201910            y: MOV                R9.y,  KC0[7].y
 0084  00000885 41001910            z: MOV                R8.z,  KC0[5].z
 0086  80000087 01201910            t: MOV                R9.x,  KC0[7].x
 0088  00000485 21001910    11      y: MOV                R8.y,  KC0[5].y
 0090  80000c02 61201910            w: MOV                R9.w,  R2.w
 0092  00012002 00205000    12      x: DOT4               __.x,  R2.x, R9.x
 0094  00812402 20205010            y: DOT4               R1.y,  R2.y, R9.y
 0096  01012802 40205000            z: DOT4               __.z,  R2.z, R9.z
 0098  819fcc02 60205000            w: DOT4               __.w,  R2.w, PV.w
 0100  001fc0fe 008340f8    13      x: CNDGE              R4.x,  PV.x, PV.x, 0
 0102  81906c01 60028001            w: MULADD_IEEE        R0.w,  R1.w, KC0[3].w, R1.x
 0104  01106c01 40028c05    14      z: MULADD_IEEE        R0.z,  R1.w, KC0[3].z, R5.w
 0106  800000fe 00206310            t: LOG_IEEE           R1.x,  PV.x
 0108  001fe007 00200110    15      x: MUL                R1.x,  R7.x, PS
 0110  80906c01 20028407            y: MULADD_IEEE        R0.y,  R1.w, KC0[3].y, R7.y
 0112  00000085 01001910    16      x: MOV                R8.x,  KC0[5].x
 0114  00000c02 61001910            w: MOV                R8.w,  R2.w
 0116  800000fe 00206110            t: EXP_IEEE           R1.x,  PV.x
 0118  001fc002 00205000    17      x: DOT4               __.x,  R2.x, PV.x
 0120  00810402 20205010            y: DOT4               R1.y,  R2.y, R8.y
 0122  01010802 40205000            z: DOT4               __.z,  R2.z, R8.z
 0124  819fcc02 60205000            w: DOT4               __.w,  R2.w, PV.w
 0126  000020fe 002340f8    18      x: CNDGE              R1.x,  PV.x, R1.x, 0
 0128  801fc0fe 204340f8            y: CNDGE              R2.y,  PV.x, PV.x, 0
 0130  0100e4fe 004a8803    19      x: MULADD_IEEE        R2.x,  PV.y, R7.z, R3.z       VEC_120
 0132  0100a4fe 20228406            y: MULADD_IEEE        R1.y,  PV.y, R5.z, R6.y
 0134  808064fe 40228003            z: MULADD_IEEE        R1.z,  PV.y, R3.y, R3.x
 0136  01806001 80a288fe    20      x: MULADD_IEEE_sat    R5.x,  R1.x, R3.w, PV.z
 0138  0080a001 a0a284fe            y: MULADD_IEEE_sat    R5.y,  R1.x, R5.y, PV.y
 0140  01008001 c0a280fe            z: MULADD_IEEE_sat    R5.z,  R1.x, R4.z, PV.x
 0142  80000c04 e0a01910            w: MOV_sat            R5.w,  R4.w
0004  c000203c 94400688 EXPORT_DONE        POS   60   R0.xyzw  VPM
0006  c002c000 94600688 EXPORT_DONE        PARAM 0    R5.xyzw  VPM  EOP
===== SHADER_END ===============================================================

--------------------------------------------------------------
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main() #0 {
main_body:
  %0 = call float @llvm.R600.load.input(i32 0)
  %1 = call float @llvm.R600.load.input(i32 1)
  %2 = call float @llvm.R600.load.input(i32 2)
  %3 = call float @llvm.R600.load.input(i32 3)
  %4 = insertelement <4 x float> undef, float %0, i32 0
  %5 = insertelement <4 x float> %4, float %1, i32 1
  %6 = insertelement <4 x float> %5, float %2, i32 2
  %7 = insertelement <4 x float> %6, float %3, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %7, i32 0, i32 0)
  ret void
}

; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1

declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { readnone }
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T0_W in %vreg0, %T0_Z in %vreg1, %T0_Y in %vreg2, %T0_X in %vreg3

BB#0: derived from LLVM BB %main_body
    Live Ins: %T0_W %T0_Z %T0_Y %T0_X
	%T0_X<def> = KILL %T0_X, %T0_XYZW<imp-def>
	%T0_Y<def> = KILL %T0_Y, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T0_Z<def> = KILL %T0_Z, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T0_W<def> = KILL %T0_W, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	R600_ExportSwz %T0_XYZW<kill>, 0, 0, 0, 1, 2, 3, 40, 1
	CF_END_R600

# End machine code for function main.


===== SHADER #23 =========================================== PS/RV670/R600 =====
===== 4 dw ===== 1 gprs ===== 1 stack ==========================================
0000  c0000000 94600688 EXPORT_DONE        PIXEL 0    R0.xyzw  VPM  EOP
===== SHADER_END ===============================================================

--------------------------------------------------------------
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1], COLOR
DCL CONST[0..12]
DCL TEMP[0..3]
  0: MUL TEMP[0], IN[0].xxxx, CONST[0]
  1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0]
  2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0]
  3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0]
  4: DP3 TEMP[1].x, CONST[4], CONST[4]
  5: RSQ TEMP[1].x, TEMP[1]
  6: MUL TEMP[0], CONST[4], TEMP[1].xxxx
  7: MOV TEMP[2].w, CONST[5].xxxx
  8: MOV TEMP[3], CONST[6]
  9: MOV_SAT OUT[1], TEMP[3]
 10: DP3 TEMP[2].x, TEMP[0], CONST[7]
 11: DP3 TEMP[2].y, TEMP[0], CONST[9]
 12: LIT TEMP[1], TEMP[2]
 13: ADD TEMP[3], CONST[10], TEMP[3]
 14: MAD TEMP[3], TEMP[1].yyyy, CONST[11], TEMP[3]
 15: MAD_SAT OUT[1].xyz, TEMP[1].zzzz, CONST[12], TEMP[3]
 16: END
; ModuleID = 'tgsi'

define void @main() #0 {
main_body:
  %0 = call float @llvm.R600.load.input(i32 4)
  %1 = call float @llvm.R600.load.input(i32 5)
  %2 = call float @llvm.R600.load.input(i32 6)
  %3 = call float @llvm.R600.load.input(i32 7)
  %4 = load <4 x float> addrspace(8)* null
  %5 = extractelement <4 x float> %4, i32 0
  %6 = fmul float %0, %5
  %7 = load <4 x float> addrspace(8)* null
  %8 = extractelement <4 x float> %7, i32 1
  %9 = fmul float %0, %8
  %10 = load <4 x float> addrspace(8)* null
  %11 = extractelement <4 x float> %10, i32 2
  %12 = fmul float %0, %11
  %13 = load <4 x float> addrspace(8)* null
  %14 = extractelement <4 x float> %13, i32 3
  %15 = fmul float %0, %14
  %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
  %17 = extractelement <4 x float> %16, i32 0
  %18 = fmul float %1, %17
  %19 = fadd float %18, %6
  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
  %21 = extractelement <4 x float> %20, i32 1
  %22 = fmul float %1, %21
  %23 = fadd float %22, %9
  %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
  %25 = extractelement <4 x float> %24, i32 2
  %26 = fmul float %1, %25
  %27 = fadd float %26, %12
  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
  %29 = extractelement <4 x float> %28, i32 3
  %30 = fmul float %1, %29
  %31 = fadd float %30, %15
  %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
  %33 = extractelement <4 x float> %32, i32 0
  %34 = fmul float %2, %33
  %35 = fadd float %34, %19
  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
  %37 = extractelement <4 x float> %36, i32 1
  %38 = fmul float %2, %37
  %39 = fadd float %38, %23
  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
  %41 = extractelement <4 x float> %40, i32 2
  %42 = fmul float %2, %41
  %43 = fadd float %42, %27
  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
  %45 = extractelement <4 x float> %44, i32 3
  %46 = fmul float %2, %45
  %47 = fadd float %46, %31
  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
  %49 = extractelement <4 x float> %48, i32 0
  %50 = fmul float %3, %49
  %51 = fadd float %50, %35
  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
  %53 = extractelement <4 x float> %52, i32 1
  %54 = fmul float %3, %53
  %55 = fadd float %54, %39
  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
  %57 = extractelement <4 x float> %56, i32 2
  %58 = fmul float %3, %57
  %59 = fadd float %58, %43
  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
  %61 = extractelement <4 x float> %60, i32 3
  %62 = fmul float %3, %61
  %63 = fadd float %62, %47
  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
  %65 = extractelement <4 x float> %64, i32 0
  %66 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
  %67 = extractelement <4 x float> %66, i32 0
  %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
  %69 = extractelement <4 x float> %68, i32 1
  %70 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
  %71 = extractelement <4 x float> %70, i32 1
  %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
  %73 = extractelement <4 x float> %72, i32 2
  %74 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
  %75 = extractelement <4 x float> %74, i32 2
  %76 = insertelement <4 x float> undef, float %65, i32 0
  %77 = insertelement <4 x float> %76, float %69, i32 1
  %78 = insertelement <4 x float> %77, float %73, i32 2
  %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 3
  %80 = insertelement <4 x float> undef, float %67, i32 0
  %81 = insertelement <4 x float> %80, float %71, i32 1
  %82 = insertelement <4 x float> %81, float %75, i32 2
  %83 = insertelement <4 x float> %82, float 0.000000e+00, i32 3
  %84 = call float @llvm.AMDGPU.dp4(<4 x float> %79, <4 x float> %83)
  %85 = call float @fabs(float %84)
  %86 = call float @llvm.AMDGPU.rsq(float %85)
  %87 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
  %88 = extractelement <4 x float> %87, i32 0
  %89 = fmul float %88, %86
  %90 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
  %91 = extractelement <4 x float> %90, i32 1
  %92 = fmul float %91, %86
  %93 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
  %94 = extractelement <4 x float> %93, i32 2
  %95 = fmul float %94, %86
  %96 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
  %97 = extractelement <4 x float> %96, i32 0
  %98 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
  %99 = extractelement <4 x float> %98, i32 0
  %100 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
  %101 = extractelement <4 x float> %100, i32 1
  %102 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
  %103 = extractelement <4 x float> %102, i32 2
  %104 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
  %105 = extractelement <4 x float> %104, i32 3
  %106 = call float @llvm.AMDIL.clamp.(float %99, float 0.000000e+00, float 1.000000e+00)
  %107 = call float @llvm.AMDIL.clamp.(float %101, float 0.000000e+00, float 1.000000e+00)
  %108 = call float @llvm.AMDIL.clamp.(float %103, float 0.000000e+00, float 1.000000e+00)
  %109 = call float @llvm.AMDIL.clamp.(float %105, float 0.000000e+00, float 1.000000e+00)
  %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
  %111 = extractelement <4 x float> %110, i32 0
  %112 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
  %113 = extractelement <4 x float> %112, i32 1
  %114 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
  %115 = extractelement <4 x float> %114, i32 2
  %116 = insertelement <4 x float> undef, float %89, i32 0
  %117 = insertelement <4 x float> %116, float %92, i32 1
  %118 = insertelement <4 x float> %117, float %95, i32 2
  %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 3
  %120 = insertelement <4 x float> undef, float %111, i32 0
  %121 = insertelement <4 x float> %120, float %113, i32 1
  %122 = insertelement <4 x float> %121, float %115, i32 2
  %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3
  %124 = call float @llvm.AMDGPU.dp4(<4 x float> %119, <4 x float> %123)
  %125 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %126 = extractelement <4 x float> %125, i32 0
  %127 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %128 = extractelement <4 x float> %127, i32 1
  %129 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %130 = extractelement <4 x float> %129, i32 2
  %131 = insertelement <4 x float> undef, float %89, i32 0
  %132 = insertelement <4 x float> %131, float %92, i32 1
  %133 = insertelement <4 x float> %132, float %95, i32 2
  %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 3
  %135 = insertelement <4 x float> undef, float %126, i32 0
  %136 = insertelement <4 x float> %135, float %128, i32 1
  %137 = insertelement <4 x float> %136, float %130, i32 2
  %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 3
  %139 = call float @llvm.AMDGPU.dp4(<4 x float> %134, <4 x float> %138)
  %140 = fcmp uge float %124, 0.000000e+00
  %141 = select i1 %140, float %124, float 0.000000e+00
  %142 = fcmp uge float %139, 0.000000e+00
  %143 = select i1 %142, float %139, float 0.000000e+00
  %144 = call float @llvm.pow.f32(float %143, float %97)
  %145 = fcmp ult float %124, 0.000000e+00
  %146 = select i1 %145, float 0.000000e+00, float %144
  %147 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
  %148 = extractelement <4 x float> %147, i32 0
  %149 = fadd float %148, %99
  %150 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
  %151 = extractelement <4 x float> %150, i32 1
  %152 = fadd float %151, %101
  %153 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
  %154 = extractelement <4 x float> %153, i32 2
  %155 = fadd float %154, %103
  %156 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
  %157 = extractelement <4 x float> %156, i32 0
  %158 = fmul float %141, %157
  %159 = fadd float %158, %149
  %160 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
  %161 = extractelement <4 x float> %160, i32 1
  %162 = fmul float %141, %161
  %163 = fadd float %162, %152
  %164 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
  %165 = extractelement <4 x float> %164, i32 2
  %166 = fmul float %141, %165
  %167 = fadd float %166, %155
  %168 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
  %169 = extractelement <4 x float> %168, i32 0
  %170 = fmul float %146, %169
  %171 = fadd float %170, %159
  %172 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
  %173 = extractelement <4 x float> %172, i32 1
  %174 = fmul float %146, %173
  %175 = fadd float %174, %163
  %176 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12)
  %177 = extractelement <4 x float> %176, i32 2
  %178 = fmul float %146, %177
  %179 = fadd float %178, %167
  %180 = call float @llvm.AMDIL.clamp.(float %171, float 0.000000e+00, float 1.000000e+00)
  %181 = call float @llvm.AMDIL.clamp.(float %175, float 0.000000e+00, float 1.000000e+00)
  %182 = call float @llvm.AMDIL.clamp.(float %179, float 0.000000e+00, float 1.000000e+00)
  %183 = insertelement <4 x float> undef, float %51, i32 0
  %184 = insertelement <4 x float> %183, float %55, i32 1
  %185 = insertelement <4 x float> %184, float %59, i32 2
  %186 = insertelement <4 x float> %185, float %63, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %186, i32 60, i32 1)
  %187 = insertelement <4 x float> undef, float %180, i32 0
  %188 = insertelement <4 x float> %187, float %181, i32 1
  %189 = insertelement <4 x float> %188, float %182, i32 2
  %190 = insertelement <4 x float> %189, float %109, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %190, i32 0, i32 2)
  ret void
}

; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1

; Function Attrs: readonly
declare float @fabs(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #1

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { readnone }
attributes #2 = { readonly }
attributes #3 = { nounwind readonly }
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3

BB#0: derived from LLVM BB %main_body
    Live Ins: %T1_W %T1_Z %T1_Y %T1_X
	CF_CALL_FS_R600
	CF_ALU 0, 0, 0, 2, 0, 0, 0, 63
	%T0_X<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_X, 0, 0, 0, 2048, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_Y<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_Y, 0, 0, 0, 2049, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_Z<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_Z, 0, 0, 0, 2050, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_W<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X<kill>, 0, 0, 0, -1, %KC0_128_W, 0, 0, 0, 2051, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_X<def> = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_X, 0, 0, 2052, %T0_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_Y<def> = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_Y, 0, 0, 2053, %T0_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_Z<def> = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_Z, 0, 0, 2054, %T0_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_W<def> = MULADD_IEEE_r600 0, 0, %T1_Y<kill>, 0, 0, -1, %KC0_129_W, 0, 0, 2055, %T0_W<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_X<def> = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_X, 0, 0, 2056, %T0_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_Y<def> = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_Y, 0, 0, 2057, %T0_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_Z<def> = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_Z, 0, 0, 2058, %T0_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_W<def> = MULADD_IEEE_r600 0, 0, %T1_Z<kill>, 0, 0, -1, %KC0_130_W, 0, 0, 2059, %T0_W<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_X<def> = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_X, 0, 0, 2060, %T0_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW<imp-def>
	%T1_Y<def> = MOV 1, 0, 0, 0, %KC0_134_W, 0, 0, 0, 2075, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_X<def> = ADD 0, 0, 1, 0, 0, 0, %KC0_138_X, 0, 0, 0, 2088, %KC0_134_X, 0, 0, 0, 2072, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_Y<def> = ADD 0, 0, 1, 0, 0, 0, %KC0_138_Y, 0, 0, 0, 2089, %KC0_134_Y, 0, 0, 0, 2073, 1, pred:%PRED_SEL_OFF, 0, 0
	%T4_X<def> = ADD 0, 0, 1, 0, 0, 0, %KC0_138_Z, 0, 0, 0, 2090, %KC0_134_Z, 0, 0, 0, 2074, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_X<def> = MOV 1, 0, 0, 0, %KC0_133_X, 0, 0, 0, 2068, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_Y<def> = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_Y, 0, 0, 2061, %T3_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T5_X<def> = MOV 1, 0, 0, 0, %KC0_137_X, 0, 0, 0, 2084, 1, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW<imp-def>
	%T7_Z<def> = MOV 1, 0, 0, 0, %KC0_135_Z, 0, 0, 0, 2078, 1, pred:%PRED_SEL_OFF, 0, 0, %T7_XYZW<imp-def>
	%T5_Y<def> = MOV 1, 0, 0, 0, %KC0_137_Y, 0, 0, 0, 2085, 1, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW<imp-use,kill>, %T5_XYZW<imp-def>
	%T6_Z<def> = MOV 1, 0, 0, 0, %KC0_132_Z, 0, 0, 0, 2066, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW<imp-def>
	%T7_Y<def> = MOV 1, 0, 0, 0, %KC0_135_Y, 0, 0, 0, 2077, 1, pred:%PRED_SEL_OFF, 0, 0, %T7_XYZW<imp-use,kill>, %T7_XYZW<imp-def>
	%T5_Z<def> = MOV 1, 0, 0, 0, %KC0_137_Z, 0, 0, 0, 2086, 1, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW<imp-use,kill>, %T5_XYZW<imp-def>
	%T7_X<def> = MOV 1, 0, 0, 0, %KC0_135_X, 0, 0, 0, 2076, 1, pred:%PRED_SEL_OFF, 0, 0, %T7_XYZW<imp-use,kill>, %T7_XYZW<imp-def>
	%T6_Y<def> = MOV 1, 0, 0, 0, %KC0_132_Y, 0, 0, 0, 2065, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW<imp-use,kill>, %T6_XYZW<imp-def>
	%T6_X<def> = MOV 1, 0, 0, 0, %KC0_132_X, 0, 0, 0, 2064, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW<imp-use,kill>, %T6_XYZW<imp-def>
	%T0_W<def> = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_W, 0, 0, 2063, %T2_W<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T0_Z<def> = MULADD_IEEE_r600 0, 0, %T1_W<kill>, 0, 0, -1, %KC0_131_Z, 0, 0, 2062, %T2_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T6_W<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW<imp-use,kill>, %T6_XYZW<imp-def>
	BUNDLE %T1_X<imp-def>, %T1_Y<imp-def>, %T1_Z<imp-def>, %T1_W<imp-def>, %T6_X<imp-use>, %PRED_SEL_OFF<imp-use>, %T6_Y<imp-use>, %T6_Z<imp-use>, %T6_W<imp-use>
	  * %T1_X<def> = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T6_X, 0, 0, 0, -1, %T6_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_Y<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_Y, 0, 0, 0, -1, %T6_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_Z<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_Z, 0, 0, 0, -1, %T6_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_W<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_W, 0, 0, 0, -1, %T6_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T1_X<def> = RECIPSQRT_CLAMPED_r600 1, 0, 0, 0, %T1_X<kill>, 0, 0, 1, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T6_Z<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Z, 0, 0, 0, 2066, %T1_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW<imp-use,kill>, %T6_XYZW<imp-def>
	%T6_Y<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Y, 0, 0, 0, 2065, %T1_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW<imp-use,kill>, %T6_XYZW<imp-def>
	%T6_X<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_X, 0, 0, 0, 2064, %T1_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW<imp-use,kill>, %T6_XYZW<imp-def>
	%T7_W<def> = MOV 1, 0, 0, 0, %T6_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T7_XYZW<imp-def>
	BUNDLE %T1_X<imp-def>, %T1_Y<imp-def>, %T1_Z<imp-def>, %T1_W<imp-def>, %T6_X<imp-use>, %T7_X<imp-use>, %PRED_SEL_OFF<imp-use>, %T6_Y<imp-use>, %T7_Y<imp-use>, %T6_Z<imp-use>, %T7_Z<imp-use>, %T6_W<imp-use>, %T7_W<imp-use>
	  * %T1_X<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_X, 0, 0, 0, -1, %T7_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_Y<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_Y, 0, 0, 0, -1, %T7_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_Z<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_Z, 0, 0, 0, -1, %T7_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_W<def> = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T6_W, 0, 0, 0, -1, %T7_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T7_X<def> = CNDGE_r600 0, 0, %T1_W, 0, 0, -1, %T1_W, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T5_W<def> = MOV 1, 0, 0, 0, %T6_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW<imp-def>
	BUNDLE %T2_X<imp-def>, %T2_Y<imp-def>, %T2_Z<imp-def>, %T2_W<imp-def>, %T6_X<imp-use>, %T5_X<imp-use>, %PRED_SEL_OFF<imp-use>, %T6_Y<imp-use>, %T5_Y<imp-use>, %T6_Z<imp-use>, %T5_Z<imp-use>, %T6_W<imp-use>, %T5_W<imp-use>
	  * %T2_X<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_X, 0, 0, 0, -1, %T5_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T2_Y<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_Y, 0, 0, 0, -1, %T5_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T2_Z<def> = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T6_Z, 0, 0, 0, -1, %T5_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T2_W<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_W, 0, 0, 0, -1, %T5_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T1_X<def> = MULADD_IEEE_r600 0, 0, %T7_X, 0, 0, -1, %KC0_139_Y, 0, 0, 2093, %T2_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_Y<def> = MULADD_IEEE_r600 0, 0, %T7_X, 0, 0, -1, %KC0_139_Z, 0, 0, 2094, %T4_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T1_Z<def> = MULADD_IEEE_r600 0, 0, %T7_X<kill>, 0, 0, -1, %KC0_139_X, 0, 0, 2092, %T3_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_W<def> = CNDGE_r600 0, 0, %T2_Z<kill>, 0, 0, -1, %T2_Z, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_X<def> = LOG_IEEE_r600 1, 0, 0, 0, %T2_W<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_X<def> = MUL 0, 0, 1, 0, 0, 0, %T2_X<kill>, 0, 0, 0, -1, %T3_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_X<def> = EXP_IEEE_r600 1, 0, 0, 0, %T2_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_X<def> = CNDGE_r600 0, 0, %T1_W<kill>, 0, 0, -1, %T2_X<kill>, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_X<def> = MULADD_IEEE_r600 0, 1, %T2_X, 0, 0, -1, %KC0_140_X, 0, 0, 2096, %T1_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW<imp-def>
	%T3_Z<def> = MULADD_IEEE_r600 0, 1, %T2_X, 0, 0, -1, %KC0_140_Z, 0, 0, 2098, %T2_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW<imp-use,kill>, %T3_XYZW<imp-def>
	%T3_Y<def> = MULADD_IEEE_r600 0, 1, %T2_X<kill>, 0, 0, -1, %KC0_140_Y, 0, 0, 2097, %T1_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW<imp-use,kill>, %T3_XYZW<imp-def>
	%T3_W<def> = MOV 1, 0, 0, 1, %T1_Y<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW<imp-use,kill>, %T3_XYZW<imp-def>
	R600_ExportSwz %T0_XYZW<kill>, 1, 60, 0, 1, 2, 3, 40, 0
	R600_ExportSwz %T3_XYZW<kill>, 2, 0, 0, 1, 2, 3, 40, 1
	CF_END_R600
	PAD

# End machine code for function main.


===== SHADER #24 =========================================== VS/RV670/R600 =====
===== 136 dw ===== 8 gprs ===== 1 stack ========================================
0000  00000000 89800000 CALL_FS @0
0002  80000006 a0f40000 ALU 62 @12 KC0[CB0:0-31]
 0012  00100001 00000210     1      x: MUL_IEEE           R0.x,  R1.x, KC0[0].x
 0014  00900001 20000210            y: MUL_IEEE           R0.y,  R1.x, KC0[0].y
 0016  01100001 40000210            z: MUL_IEEE           R0.z,  R1.x, KC0[0].z
 0018  81900001 60000210            w: MUL_IEEE           R0.w,  R1.x, KC0[0].w
 0020  00102401 000280fe     2      x: MULADD_IEEE        R0.x,  R1.y, KC0[1].x, PV.x
 0022  00902401 200284fe            y: MULADD_IEEE        R0.y,  R1.y, KC0[1].y, PV.y
 0024  01102401 400288fe            z: MULADD_IEEE        R0.z,  R1.y, KC0[1].z, PV.z
 0026  81902401 60028cfe            w: MULADD_IEEE        R0.w,  R1.y, KC0[1].w, PV.w
 0028  00104801 000280fe     3      x: MULADD_IEEE        R0.x,  R1.z, KC0[2].x, PV.x
 0030  00904801 206284fe            y: MULADD_IEEE        R3.y,  R1.z, KC0[2].y, PV.y
 0032  01104801 404288fe            z: MULADD_IEEE        R2.z,  R1.z, KC0[2].z, PV.z
 0034  81904801 60428cfe            w: MULADD_IEEE        R2.w,  R1.z, KC0[2].w, PV.w
 0036  00106c01 000280fe     4      x: MULADD_IEEE        R0.x,  R1.w, KC0[3].x, PV.x
 0038  00000c86 20201910            y: MOV                R1.y,  KC0[6].w
 0040  8010c08a 00600010            t: ADD                R3.x,  KC0[10].x, KC0[6].x
 0042  0110c88a 00800010     5      x: ADD                R4.x,  KC0[10].z, KC0[6].z
 0044  8090c48a 20400010            y: ADD                R2.y,  KC0[10].y, KC0[6].y
 0046  00000085 00401910     6      x: MOV                R2.x,  KC0[5].x
 0048  00906c01 20028403            y: MULADD_IEEE        R0.y,  R1.w, KC0[3].y, R3.y
 0050  00000887 40e01910            z: MOV                R7.z,  KC0[7].z
 0052  80000089 00a01910            t: MOV                R5.x,  KC0[9].x
 0054  00000489 20a01910     7      y: MOV                R5.y,  KC0[9].y
 0056  00000884 40c01910            z: MOV                R6.z,  KC0[4].z
 0058  80000487 20e01910            t: MOV                R7.y,  KC0[7].y
 0060  00000087 00e01910     8      x: MOV                R7.x,  KC0[7].x
 0062  00000484 20c01910            y: MOV                R6.y,  KC0[4].y
 0064  00000889 40a01910            z: MOV                R5.z,  KC0[9].z
 0066  80000084 00c01910            t: MOV                R6.x,  KC0[4].x
 0068  01106c01 40028802     9      z: MULADD_IEEE        R0.z,  R1.w, KC0[3].z, R2.z
 0070  01906c01 60028c02            w: MULADD_IEEE        R0.w,  R1.w, KC0[3].w, R2.w
 0072  800000f8 60c01910            t: MOV                R6.w,  0
 0074  0000c006 00205010    10      x: DOT4               R1.x,  R6.x, R6.x
 0076  0080c406 20205000            y: DOT4               __.y,  R6.y, R6.y
 0078  0100c806 40205000            z: DOT4               __.z,  R6.z, R6.z
 0080  801fe0ff 60205000            w: DOT4               __.w,  PS, PS
 0082  800000fe 00206711    11      t: RECIPSQRT_CLAMPED  R1.x,  |PV.x|
 0084  001fe084 00c00210    12      x: MUL_IEEE           R6.x,  KC0[4].x, PS
 0086  001fe484 20c00210            y: MUL_IEEE           R6.y,  KC0[4].y, PS
 0088  001fe884 40c00210            z: MUL_IEEE           R6.z,  KC0[4].z, PS
 0090  80000c06 60e01910            w: MOV                R7.w,  R6.w
 0092  0000e0fe 00205000    13      x: DOT4               __.x,  PV.x, R7.x
 0094  0080e4fe 20205000            y: DOT4               __.y,  PV.y, R7.y
 0096  0100e8fe 40205000            z: DOT4               __.z,  PV.z, R7.z
 0098  819fcc06 60205010            w: DOT4               R1.w,  R6.w, PV.w
 0100  001fc0fe 00e340f8    14      x: CNDGE              R7.x,  PV.x, PV.x, 0
 0102  80000c06 60a01910            w: MOV                R5.w,  R6.w
 0104  0000a006 00405000    15      x: DOT4               __.x,  R6.x, R5.x
 0106  0080a406 20405000            y: DOT4               __.y,  R6.y, R5.y
 0108  0100a806 40405010            z: DOT4               R2.z,  R6.z, R5.z
 0110  019fcc06 60405000            w: DOT4               __.w,  R6.w, PV.w
 0112  809160fe 00268402            t: MULADD_IEEE        R1.x,  PV.x, KC0[11].y, R2.y  SCL_122
 0114  01116007 20468004    16      y: MULADD_IEEE        R2.y,  R7.x, KC0[11].z, R4.x  VEC_021
 0116  00116007 40228003            z: MULADD_IEEE        R1.z,  R7.x, KC0[11].x, R3.x
 0118  801fc0fe 604340f8            w: CNDGE              R2.w,  PV.x, PV.x, 0
 0120  80000cfe 00606310    17      t: LOG_IEEE           R3.x,  PV.w
 0122  801fe002 00400110    18      x: MUL                R2.x,  R2.x, PS
 0124  800000fe 00406110    19      t: EXP_IEEE           R2.x,  PV.x
 0126  801fec01 004340f8    20      x: CNDGE              R2.x,  R1.w, PS, 0
 0128  001180fe 80628801    21      x: MULADD_IEEE_sat    R3.x,  PV.x, KC0[12].x, R1.z
 0130  009180fe a0628001            y: MULADD_IEEE_sat    R3.y,  PV.x, KC0[12].y, R1.x
 0132  011180fe c0628402            z: MULADD_IEEE_sat    R3.z,  PV.x, KC0[12].z, R2.y
 0134  80000401 e0601910            w: MOV_sat            R3.w,  R1.y
0004  c000203c 94400688 EXPORT_DONE        POS   60   R0.xyzw  VPM
0006  c001c000 94600688 EXPORT_DONE        PARAM 0    R3.xyzw  VPM  EOP
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32_FLOAT, }

===== SHADER #25 ======================================== FETCH/RV670/R600 =====
===== 8 dw ===== 2 gprs ===== 0 stack ==========================================
0000  00000002 81000000 VTX 1 @4
 0004  7c00a000 8c151001 00080000 VFETCH              R1.xyz1, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:48 NUM:0 COMP:0 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

--------------------------------------------------------------
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], COLOR, COLOR
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
; ModuleID = 'tgsi'

define void @main() #0 {
main_body:
  %0 = call float @llvm.R600.load.input(i32 0)
  %1 = call float @llvm.R600.load.input(i32 1)
  %2 = call float @llvm.R600.load.input(i32 2)
  %3 = call float @llvm.R600.load.input(i32 3)
  %4 = insertelement <4 x float> undef, float %0, i32 0
  %5 = insertelement <4 x float> %4, float %1, i32 1
  %6 = insertelement <4 x float> %5, float %2, i32 2
  %7 = insertelement <4 x float> %6, float %3, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %7, i32 0, i32 0)
  ret void
}

; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1

declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)

attributes #0 = { "ShaderType"="0" }
attributes #1 = { readnone }
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T0_W in %vreg0, %T0_Z in %vreg1, %T0_Y in %vreg2, %T0_X in %vreg3

BB#0: derived from LLVM BB %main_body
    Live Ins: %T0_W %T0_Z %T0_Y %T0_X
	%T0_X<def> = KILL %T0_X, %T0_XYZW<imp-def>
	%T0_Y<def> = KILL %T0_Y, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T0_Z<def> = KILL %T0_Z, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T0_W<def> = KILL %T0_W, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	R600_ExportSwz %T0_XYZW<kill>, 0, 0, 0, 1, 2, 3, 40, 1
	CF_END_R600

# End machine code for function main.


===== SHADER #26 =========================================== PS/RV670/R600 =====
===== 4 dw ===== 1 gprs ===== 1 stack ==========================================
0000  c0000000 94600688 EXPORT_DONE        PIXEL 0    R0.xyzw  VPM  EOP
===== SHADER_END ===============================================================

--------------------------------------------------------------
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], COLOR
DCL CONST[0..11]
DCL TEMP[0..3]
  0: MUL TEMP[0], IN[0].xxxx, CONST[0]
  1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0]
  2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0]
  3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0]
  4: DP3 TEMP[1].x, IN[1], IN[1]
  5: RSQ TEMP[1].x, TEMP[1]
  6: MUL TEMP[0], IN[1], TEMP[1].xxxx
  7: MOV TEMP[2].w, CONST[4].xxxx
  8: MOV TEMP[3], CONST[5]
  9: MOV_SAT OUT[1], TEMP[3]
 10: DP3 TEMP[2].x, TEMP[0], CONST[6]
 11: DP3 TEMP[2].y, TEMP[0], CONST[8]
 12: LIT TEMP[1], TEMP[2]
 13: ADD TEMP[3], CONST[9], TEMP[3]
 14: MAD TEMP[3], TEMP[1].yyyy, CONST[10], TEMP[3]
 15: MAD_SAT OUT[1].xyz, TEMP[1].zzzz, CONST[11], TEMP[3]
 16: END
; ModuleID = 'tgsi'

define void @main() #0 {
main_body:
  %0 = call float @llvm.R600.load.input(i32 4)
  %1 = call float @llvm.R600.load.input(i32 5)
  %2 = call float @llvm.R600.load.input(i32 6)
  %3 = call float @llvm.R600.load.input(i32 7)
  %4 = call float @llvm.R600.load.input(i32 8)
  %5 = call float @llvm.R600.load.input(i32 9)
  %6 = call float @llvm.R600.load.input(i32 10)
  %7 = call float @llvm.R600.load.input(i32 11)
  %8 = load <4 x float> addrspace(8)* null
  %9 = extractelement <4 x float> %8, i32 0
  %10 = fmul float %0, %9
  %11 = load <4 x float> addrspace(8)* null
  %12 = extractelement <4 x float> %11, i32 1
  %13 = fmul float %0, %12
  %14 = load <4 x float> addrspace(8)* null
  %15 = extractelement <4 x float> %14, i32 2
  %16 = fmul float %0, %15
  %17 = load <4 x float> addrspace(8)* null
  %18 = extractelement <4 x float> %17, i32 3
  %19 = fmul float %0, %18
  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
  %21 = extractelement <4 x float> %20, i32 0
  %22 = fmul float %1, %21
  %23 = fadd float %22, %10
  %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
  %25 = extractelement <4 x float> %24, i32 1
  %26 = fmul float %1, %25
  %27 = fadd float %26, %13
  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
  %29 = extractelement <4 x float> %28, i32 2
  %30 = fmul float %1, %29
  %31 = fadd float %30, %16
  %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
  %33 = extractelement <4 x float> %32, i32 3
  %34 = fmul float %1, %33
  %35 = fadd float %34, %19
  %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
  %37 = extractelement <4 x float> %36, i32 0
  %38 = fmul float %2, %37
  %39 = fadd float %38, %23
  %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
  %41 = extractelement <4 x float> %40, i32 1
  %42 = fmul float %2, %41
  %43 = fadd float %42, %27
  %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
  %45 = extractelement <4 x float> %44, i32 2
  %46 = fmul float %2, %45
  %47 = fadd float %46, %31
  %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
  %49 = extractelement <4 x float> %48, i32 3
  %50 = fmul float %2, %49
  %51 = fadd float %50, %35
  %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
  %53 = extractelement <4 x float> %52, i32 0
  %54 = fmul float %3, %53
  %55 = fadd float %54, %39
  %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
  %57 = extractelement <4 x float> %56, i32 1
  %58 = fmul float %3, %57
  %59 = fadd float %58, %43
  %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
  %61 = extractelement <4 x float> %60, i32 2
  %62 = fmul float %3, %61
  %63 = fadd float %62, %47
  %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
  %65 = extractelement <4 x float> %64, i32 3
  %66 = fmul float %3, %65
  %67 = fadd float %66, %51
  %68 = insertelement <4 x float> undef, float %4, i32 0
  %69 = insertelement <4 x float> %68, float %5, i32 1
  %70 = insertelement <4 x float> %69, float %6, i32 2
  %71 = insertelement <4 x float> %70, float 0.000000e+00, i32 3
  %72 = insertelement <4 x float> undef, float %4, i32 0
  %73 = insertelement <4 x float> %72, float %5, i32 1
  %74 = insertelement <4 x float> %73, float %6, i32 2
  %75 = insertelement <4 x float> %74, float 0.000000e+00, i32 3
  %76 = call float @llvm.AMDGPU.dp4(<4 x float> %71, <4 x float> %75)
  %77 = call float @fabs(float %76)
  %78 = call float @llvm.AMDGPU.rsq(float %77)
  %79 = fmul float %4, %78
  %80 = fmul float %5, %78
  %81 = fmul float %6, %78
  %82 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
  %83 = extractelement <4 x float> %82, i32 0
  %84 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
  %85 = extractelement <4 x float> %84, i32 0
  %86 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
  %87 = extractelement <4 x float> %86, i32 1
  %88 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
  %89 = extractelement <4 x float> %88, i32 2
  %90 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
  %91 = extractelement <4 x float> %90, i32 3
  %92 = call float @llvm.AMDIL.clamp.(float %85, float 0.000000e+00, float 1.000000e+00)
  %93 = call float @llvm.AMDIL.clamp.(float %87, float 0.000000e+00, float 1.000000e+00)
  %94 = call float @llvm.AMDIL.clamp.(float %89, float 0.000000e+00, float 1.000000e+00)
  %95 = call float @llvm.AMDIL.clamp.(float %91, float 0.000000e+00, float 1.000000e+00)
  %96 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
  %97 = extractelement <4 x float> %96, i32 0
  %98 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
  %99 = extractelement <4 x float> %98, i32 1
  %100 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
  %101 = extractelement <4 x float> %100, i32 2
  %102 = insertelement <4 x float> undef, float %79, i32 0
  %103 = insertelement <4 x float> %102, float %80, i32 1
  %104 = insertelement <4 x float> %103, float %81, i32 2
  %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 3
  %106 = insertelement <4 x float> undef, float %97, i32 0
  %107 = insertelement <4 x float> %106, float %99, i32 1
  %108 = insertelement <4 x float> %107, float %101, i32 2
  %109 = insertelement <4 x float> %108, float 0.000000e+00, i32 3
  %110 = call float @llvm.AMDGPU.dp4(<4 x float> %105, <4 x float> %109)
  %111 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
  %112 = extractelement <4 x float> %111, i32 0
  %113 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
  %114 = extractelement <4 x float> %113, i32 1
  %115 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
  %116 = extractelement <4 x float> %115, i32 2
  %117 = insertelement <4 x float> undef, float %79, i32 0
  %118 = insertelement <4 x float> %117, float %80, i32 1
  %119 = insertelement <4 x float> %118, float %81, i32 2
  %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 3
  %121 = insertelement <4 x float> undef, float %112, i32 0
  %122 = insertelement <4 x float> %121, float %114, i32 1
  %123 = insertelement <4 x float> %122, float %116, i32 2
  %124 = insertelement <4 x float> %123, float 0.000000e+00, i32 3
  %125 = call float @llvm.AMDGPU.dp4(<4 x float> %120, <4 x float> %124)
  %126 = fcmp uge float %110, 0.000000e+00
  %127 = select i1 %126, float %110, float 0.000000e+00
  %128 = fcmp uge float %125, 0.000000e+00
  %129 = select i1 %128, float %125, float 0.000000e+00
  %130 = call float @llvm.pow.f32(float %129, float %83)
  %131 = fcmp ult float %110, 0.000000e+00
  %132 = select i1 %131, float 0.000000e+00, float %130
  %133 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %134 = extractelement <4 x float> %133, i32 0
  %135 = fadd float %134, %85
  %136 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %137 = extractelement <4 x float> %136, i32 1
  %138 = fadd float %137, %87
  %139 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9)
  %140 = extractelement <4 x float> %139, i32 2
  %141 = fadd float %140, %89
  %142 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
  %143 = extractelement <4 x float> %142, i32 0
  %144 = fmul float %127, %143
  %145 = fadd float %144, %135
  %146 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
  %147 = extractelement <4 x float> %146, i32 1
  %148 = fmul float %127, %147
  %149 = fadd float %148, %138
  %150 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10)
  %151 = extractelement <4 x float> %150, i32 2
  %152 = fmul float %127, %151
  %153 = fadd float %152, %141
  %154 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
  %155 = extractelement <4 x float> %154, i32 0
  %156 = fmul float %132, %155
  %157 = fadd float %156, %145
  %158 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
  %159 = extractelement <4 x float> %158, i32 1
  %160 = fmul float %132, %159
  %161 = fadd float %160, %149
  %162 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
  %163 = extractelement <4 x float> %162, i32 2
  %164 = fmul float %132, %163
  %165 = fadd float %164, %153
  %166 = call float @llvm.AMDIL.clamp.(float %157, float 0.000000e+00, float 1.000000e+00)
  %167 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00)
  %168 = call float @llvm.AMDIL.clamp.(float %165, float 0.000000e+00, float 1.000000e+00)
  %169 = insertelement <4 x float> undef, float %55, i32 0
  %170 = insertelement <4 x float> %169, float %59, i32 1
  %171 = insertelement <4 x float> %170, float %63, i32 2
  %172 = insertelement <4 x float> %171, float %67, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %172, i32 60, i32 1)
  %173 = insertelement <4 x float> undef, float %166, i32 0
  %174 = insertelement <4 x float> %173, float %167, i32 1
  %175 = insertelement <4 x float> %174, float %168, i32 2
  %176 = insertelement <4 x float> %175, float %95, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %176, i32 0, i32 2)
  ret void
}

; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1

; Function Attrs: readnone
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1

; Function Attrs: readonly
declare float @fabs(float) #2

; Function Attrs: readnone
declare float @llvm.AMDGPU.rsq(float) #1

; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #1

; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3

declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)

attributes #0 = { "ShaderType"="1" }
attributes #1 = { readnone }
attributes #2 = { readonly }
attributes #3 = { nounwind readonly }
# Machine code for function main: Post SSA, not tracking liveness
Function Live Ins: %T2_Z in %vreg0, %T2_Y in %vreg1, %T2_X in %vreg2, %T1_W in %vreg3, %T1_Z in %vreg4, %T1_Y in %vreg5, %T1_X in %vreg6

BB#0: derived from LLVM BB %main_body
    Live Ins: %T2_Z %T2_Y %T2_X %T1_W %T1_Z %T1_Y %T1_X
	CF_CALL_FS_R600
	%T2_X<def> = KILL %T2_X, %T2_XYZW<imp-def>
	%T2_Y<def> = KILL %T2_Y, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	%T2_Z<def> = KILL %T2_Z, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	CF_ALU 0, 0, 0, 2, 0, 0, 0, 60
	%T0_X<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_X, 0, 0, 0, 2048, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_Y<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_Y, 0, 0, 0, 2049, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_Z<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_Z, 0, 0, 0, 2050, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_W<def> = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	BUNDLE %T0_X<imp-def>, %T0_Y<imp-def>, %T0_Z<imp-def>, %T0_W<imp-def>, %T2_X<imp-use>, %PRED_SEL_OFF<imp-use>, %T2_Y<imp-use>, %T2_Z<imp-use>, %T2_W<imp-use>
	  * %T0_X<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T2_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T0_Y<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T2_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T0_Z<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T2_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T0_W<def> = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_X<def> = RECIPSQRT_CLAMPED_r600 1, 0, 0, 0, %T0_W<kill>, 0, 0, 1, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_Y<def> = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_X, 0, 0, 2052, %T0_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_Z<def> = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_Y, 0, 0, 2053, %T0_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_W<def> = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_Z, 0, 0, 2054, %T0_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_X<def> = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_X, 0, 0, 2056, %T3_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_Y<def> = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_Y, 0, 0, 2057, %T3_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_Z<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T3_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	%T3_W<def> = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_Z, 0, 0, 2058, %T0_W<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T0_X<def> = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_X, 0, 0, 2060, %T0_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW<imp-def>
	%T2_Y<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T3_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	%T3_Z<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X<kill>, 0, 0, 0, -1, %KC0_128_W, 0, 0, 0, 2051, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_X<def> = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T3_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T2_XYZW<imp-use,kill>, %T2_XYZW<imp-def>
	%T0_Y<def> = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_Y, 0, 0, 2061, %T3_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T3_Z<def> = MULADD_IEEE_r600 0, 0, %T1_Y<kill>, 0, 0, -1, %KC0_129_W, 0, 0, 2055, %T3_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T4_X<def> = MULADD_IEEE_r600 0, 0, %T1_Z<kill>, 0, 0, -1, %KC0_130_W, 0, 0, 2059, %T3_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T5_Y<def> = MOV 1, 0, 0, 0, %KC0_132_X, 0, 0, 0, 2064, 1, pred:%PRED_SEL_OFF, 0, 0
	%T1_X<def> = ADD 0, 0, 1, 0, 0, 0, %KC0_137_X, 0, 0, 0, 2084, %KC0_133_X, 0, 0, 0, 2068, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_Y<def> = ADD 0, 0, 1, 0, 0, 0, %KC0_137_Y, 0, 0, 0, 2085, %KC0_133_Y, 0, 0, 0, 2069, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_X<def> = ADD 0, 0, 1, 0, 0, 0, %KC0_137_Z, 0, 0, 0, 2086, %KC0_133_Z, 0, 0, 0, 2070, 1, pred:%PRED_SEL_OFF, 0, 0
	%T1_Y<def> = MOV 1, 0, 0, 0, %KC0_133_W, 0, 0, 0, 2071, 1, pred:%PRED_SEL_OFF, 0, 0
	%T6_Z<def> = MOV 1, 0, 0, 0, %KC0_136_Z, 0, 0, 0, 2082, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW<imp-def>
	%T0_W<def> = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_W, 0, 0, 2063, %T4_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T6_Y<def> = MOV 1, 0, 0, 0, %KC0_136_Y, 0, 0, 0, 2081, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW<imp-use,kill>, %T6_XYZW<imp-def>
	%T0_Z<def> = MULADD_IEEE_r600 0, 0, %T1_W<kill>, 0, 0, -1, %KC0_131_Z, 0, 0, 2062, %T3_W<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW<imp-use,kill>, %T0_XYZW<imp-def>
	%T6_X<def> = MOV 1, 0, 0, 0, %KC0_136_X, 0, 0, 0, 2080, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW<imp-use,kill>, %T6_XYZW<imp-def>
	%T4_Z<def> = MOV 1, 0, 0, 0, %KC0_134_Z, 0, 0, 0, 2074, 1, pred:%PRED_SEL_OFF, 0, 0, %T4_XYZW<imp-def>
	%T4_Y<def> = MOV 1, 0, 0, 0, %KC0_134_Y, 0, 0, 0, 2073, 1, pred:%PRED_SEL_OFF, 0, 0, %T4_XYZW<imp-use,kill>, %T4_XYZW<imp-def>
	%T6_W<def> = MOV 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW<imp-def>
	BUNDLE %T1_X<imp-def>, %T1_Y<imp-def>, %T1_Z<imp-def>, %T1_W<imp-def>, %T2_X<imp-use>, %T6_X<imp-use>, %PRED_SEL_OFF<imp-use>, %T2_Y<imp-use>, %T6_Y<imp-use>, %T2_Z<imp-use>, %T6_Z<imp-use>, %T2_W<imp-use>, %T6_W<imp-use>
	  * %T1_X<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T6_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_Y<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T6_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_Z<def> = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T6_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_W<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T6_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T5_X<def> = CNDGE_r600 0, 0, %T1_Z<kill>, 0, 0, -1, %T1_Z, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T5_X<def> = LOG_IEEE_r600 1, 0, 0, 0, %T5_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T5_X<def> = MUL 0, 0, 1, 0, 0, 0, %T5_Y<kill>, 0, 0, 0, -1, %T5_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T5_X<def> = EXP_IEEE_r600 1, 0, 0, 0, %T5_X<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T4_X<def> = MOV 1, 0, 0, 0, %KC0_134_X, 0, 0, 0, 2072, 1, pred:%PRED_SEL_OFF, 0, 0, %T4_XYZW<imp-use,kill>, %T4_XYZW<imp-def>
	%T4_W<def> = MOV 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T4_XYZW<imp-def>
	BUNDLE %T1_X<imp-def>, %T1_Y<imp-def>, %T1_Z<imp-def>, %T1_W<imp-def>, %T2_X<imp-use>, %T4_X<imp-use>, %PRED_SEL_OFF<imp-use>, %T2_Y<imp-use>, %T4_Y<imp-use>, %T2_Z<imp-use>, %T4_Z<imp-use>, %T2_W<imp-use>, %T4_W<imp-use>
	  * %T1_X<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T4_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_Y<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T4_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_Z<def> = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T4_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0
	  * %T1_W<def> = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T4_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_X<def> = CNDGE_r600 0, 0, %T1_Z, 0, 0, -1, %T5_X<kill>, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T4_Y<def> = CNDGE_r600 0, 0, %T1_Z<kill>, 0, 0, -1, %T1_Z, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T4_X<def> = MULADD_IEEE_r600 0, 0, %T4_Y, 0, 0, -1, %KC0_138_Y, 0, 0, 2089, %T3_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T2_Y<def> = MULADD_IEEE_r600 0, 0, %T4_Y, 0, 0, -1, %KC0_138_Z, 0, 0, 2090, %T3_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T1_Z<def> = MULADD_IEEE_r600 0, 0, %T4_Y<kill>, 0, 0, -1, %KC0_138_X, 0, 0, 2088, %T1_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0
	%T3_X<def> = MULADD_IEEE_r600 0, 1, %T2_X, 0, 0, -1, %KC0_139_X, 0, 0, 2092, %T1_Z<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW<imp-def>
	%T3_Z<def> = MULADD_IEEE_r600 0, 1, %T2_X, 0, 0, -1, %KC0_139_Z, 0, 0, 2094, %T2_Y<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW<imp-use,kill>, %T3_XYZW<imp-def>
	%T3_Y<def> = MULADD_IEEE_r600 0, 1, %T2_X<kill>, 0, 0, -1, %KC0_139_Y, 0, 0, 2093, %T4_X<kill>, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW<imp-use,kill>, %T3_XYZW<imp-def>
	%T3_W<def> = MOV 1, 0, 0, 1, %T1_Y<kill>, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW<imp-use,kill>, %T3_XYZW<imp-def>
	R600_ExportSwz %T0_XYZW<kill>, 1, 60, 0, 1, 2, 3, 40, 0
	R600_ExportSwz %T3_XYZW<kill>, 2, 0, 0, 1, 2, 3, 40, 1
	CF_END_R600
	PAD

# End machine code for function main.


===== SHADER #27 =========================================== VS/RV670/R600 =====
===== 130 dw ===== 7 gprs ===== 1 stack ========================================
0000  00000000 89800000 CALL_FS @0
0002  80000006 a0e80000 ALU 59 @12 KC0[CB0:0-31]
 0012  00100001 00000210     1      x: MUL_IEEE           R0.x,  R1.x, KC0[0].x
 0014  00900001 20000210            y: MUL_IEEE           R0.y,  R1.x, KC0[0].y
 0016  01100001 40000210            z: MUL_IEEE           R0.z,  R1.x, KC0[0].z
 0018  800000f8 60401910            w: MOV                R2.w,  0
 0020  00004002 00005000     2      x: DOT4               __.x,  R2.x, R2.x
 0022  00804402 20005000            y: DOT4               __.y,  R2.y, R2.y
 0024  01004802 40005000            z: DOT4               __.z,  R2.z, R2.z
 0026  819fccfe 60005010            w: DOT4               R0.w,  PV.w, PV.w
 0028  00102401 20628000     3      y: MULADD_IEEE        R3.y,  R1.y, KC0[1].x, R0.x
 0030  00902401 40628400            z: MULADD_IEEE        R3.z,  R1.y, KC0[1].y, R0.y
 0032  01102401 60028800            w: MULADD_IEEE        R0.w,  R1.y, KC0[1].z, R0.z
 0034  800000fe 00606711            t: RECIPSQRT_CLAMPED  R3.x,  |PV.x|
 0036  00104801 000284fe     4      x: MULADD_IEEE        R0.x,  R1.z, KC0[2].x, PV.y
 0038  00904801 206288fe            y: MULADD_IEEE        R3.y,  R1.z, KC0[2].y, PV.z
 0040  001fe802 40480210            z: MUL_IEEE           R2.z,  R2.z, PS               VEC_120
 0042  81104801 60628cfe            w: MULADD_IEEE        R3.w,  R1.z, KC0[2].z, PV.w
 0044  00106c01 000280fe     5      x: MULADD_IEEE        R0.x,  R1.w, KC0[3].x, PV.x
 0046  00006402 20400210            y: MUL_IEEE           R2.y,  R2.y, R3.x
 0048  01900001 40600210            z: MUL_IEEE           R3.z,  R1.x, KC0[0].w
 0050  80006002 00400210            t: MUL_IEEE           R2.x,  R2.x, R3.x
 0052  00906c01 20028403     6      y: MULADD_IEEE        R0.y,  R1.w, KC0[3].y, R3.y
 0054  81902401 406288fe            z: MULADD_IEEE        R3.z,  R1.y, KC0[1].w, PV.z
 0056  01904801 008288fe     7      x: MULADD_IEEE        R4.x,  R1.z, KC0[2].w, PV.z
 0058  00000084 20a01910            y: MOV                R5.y,  KC0[4].x
 0060  8010a089 00200010            t: ADD                R1.x,  KC0[9].x, KC0[5].x
 0062  0110a889 00600010     8      x: ADD                R3.x,  KC0[9].z, KC0[5].z
 0064  8090a489 20600010            y: ADD                R3.y,  KC0[9].y, KC0[5].y
 0066  00000c85 20201910     9      y: MOV                R1.y,  KC0[5].w
 0068  00000888 40c01910            z: MOV                R6.z,  KC0[8].z
 0070  01906c01 60028004            w: MULADD_IEEE        R0.w,  R1.w, KC0[3].w, R4.x
 0072  80000488 20c01910            t: MOV                R6.y,  KC0[8].y
 0074  00000088 00c01910    10      x: MOV                R6.x,  KC0[8].x
 0076  00000486 20801910            y: MOV                R4.y,  KC0[6].y
 0078  01106c01 400e8c03            z: MULADD_IEEE        R0.z,  R1.w, KC0[3].z, R3.w   VEC_102
 0080  00000c02 60c01910            w: MOV                R6.w,  R2.w
 0082  80000886 40801910            t: MOV                R4.z,  KC0[6].z
 0084  001fc002 00205000    11      x: DOT4               __.x,  R2.x, PV.x
 0086  0080c402 20205000            y: DOT4               __.y,  R2.y, R6.y
 0088  0100c802 40205010            z: DOT4               R1.z,  R2.z, R6.z
 0090  819fcc02 60205000            w: DOT4               __.w,  R2.w, PV.w
 0092  801fc0fe 00a340f8    12      x: CNDGE              R5.x,  PV.x, PV.x, 0
 0094  800000fe 00a06310    13      t: LOG_IEEE           R5.x,  PV.x
 0096  801fe405 00a00110    14      x: MUL                R5.x,  R5.y, PS
 0098  00000086 00801910    15      x: MOV                R4.x,  KC0[6].x
 0100  00000c02 60801910            w: MOV                R4.w,  R2.w
 0102  800000fe 00a06110            t: EXP_IEEE           R5.x,  PV.x
 0104  001fc002 00205000    16      x: DOT4               __.x,  R2.x, PV.x
 0106  00808402 20205000            y: DOT4               __.y,  R2.y, R4.y
 0108  01008802 40205010            z: DOT4               R1.z,  R2.z, R4.z
 0110  819fcc02 60205000            w: DOT4               __.w,  R2.w, PV.w
 0112  0000a0fe 004340f8    17      x: CNDGE              R2.x,  PV.x, R5.x, 0
 0114  801fc0fe 208340f8            y: CNDGE              R4.y,  PV.x, PV.x, 0
 0116  009144fe 00828403    18      x: MULADD_IEEE        R4.x,  PV.y, KC0[10].y, R3.y
 0118  011144fe 20468003            y: MULADD_IEEE        R2.y,  PV.y, KC0[10].z, R3.x  VEC_021
 0120  801144fe 40228001            z: MULADD_IEEE        R1.z,  PV.y, KC0[10].x, R1.x
 0122  00116002 806288fe    19      x: MULADD_IEEE_sat    R3.x,  R2.x, KC0[11].x, PV.z
 0124  00916002 a06280fe            y: MULADD_IEEE_sat    R3.y,  R2.x, KC0[11].y, PV.x
 0126  01116002 c06284fe            z: MULADD_IEEE_sat    R3.z,  R2.x, KC0[11].z, PV.y
 0128  80000401 e0601910            w: MOV_sat            R3.w,  R1.y
0004  c000203c 94400688 EXPORT_DONE        POS   60   R0.xyzw  VPM
0006  c001c000 94600688 EXPORT_DONE        PARAM 0    R3.xyzw  VPM  EOP
===== SHADER_END ===============================================================

--------------------------------------------------------------
Vertex elements state:
   {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32_FLOAT, }
   {src_offset = 12, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32_FLOAT, }

===== SHADER #28 ======================================== FETCH/RV670/R600 =====
===== 12 dw ===== 3 gprs ===== 0 stack =========================================
0000  00000002 81000400 VTX 2 @4
 0004  7c00a000 8c151001 00080000 VFETCH              R1.xyz1, R0.x,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:48 NUM:0 COMP:0 MODE:1)
 0008  7c00a000 8c151002 0008000c VFETCH              R2.xyz1, R0.x + 12b ,   RID:160  VERTEX MFC:31 UCF:0 FMT(DTA:48 NUM:0 COMP:0 MODE:1)
0002  00000000 8a000000 RET @0
===== SHADER_END ===============================================================

290 frames in 5.0 seconds = 57.933 FPS