-------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, } {src_offset = 16, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, } ===== SHADER #1 ========================================= FETCH/RV670/R600 ===== ===== 12 dw ===== 3 gprs ===== 0 stack ========================================= 0000 00000002 81000400 VTX 2 @4 0004 7c00a000 88cd1001 00080000 VFETCH R1.xyzw, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1) 0008 7c00a000 88cd1002 00080010 VFETCH R2.xyzw, R0.x + 16b , RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, } {src_offset = 16, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_SINT, } ===== SHADER #2 ========================================= FETCH/RV670/R600 ===== ===== 12 dw ===== 3 gprs ===== 0 stack ========================================= 0000 00000002 81000400 VTX 2 @4 0004 7c00a000 88cd1001 00080000 VFETCH R1.xyzw, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1) 0008 7c00a000 d88d1002 00080010 VFETCH R2.xyzw, R0.x + 16b , RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:34 NUM:1 COMP:1 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, } {src_offset = 16, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_UINT, } ===== SHADER #3 ========================================= FETCH/RV670/R600 ===== ===== 12 dw ===== 3 gprs ===== 0 stack ========================================= 0000 00000002 81000400 VTX 2 @4 0004 7c00a000 88cd1001 00080000 VFETCH R1.xyzw, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1) 0008 7c00a000 988d1002 00080010 VFETCH R2.xyzw, R0.x + 16b , RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:34 NUM:1 COMP:0 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32_UINT, } ===== SHADER #4 ========================================= FETCH/RV670/R600 ===== ===== 8 dw ===== 2 gprs ===== 0 stack ========================================== 0000 00000002 81000000 VTX 1 @4 0004 7c00a000 93564001 00080000 VFETCH R1.x001, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:13 NUM:1 COMP:0 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32_UINT, } ===== SHADER #5 ========================================= FETCH/RV670/R600 ===== ===== 8 dw ===== 2 gprs ===== 0 stack ========================================== 0000 00000002 81000000 VTX 1 @4 0004 7c00a000 97561001 00080000 VFETCH R1.xy01, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:29 NUM:1 COMP:0 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32_UINT, } ===== SHADER #6 ========================================= FETCH/RV670/R600 ===== ===== 8 dw ===== 2 gprs ===== 0 stack ========================================== 0000 00000002 81000000 VTX 1 @4 0004 7c00a000 9bd51001 00080000 VFETCH R1.xyz1, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:47 NUM:1 COMP:0 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_UINT, } ===== SHADER #7 ========================================= FETCH/RV670/R600 ===== ===== 8 dw ===== 2 gprs ===== 0 stack ========================================== 0000 00000002 81000000 VTX 1 @4 0004 7c00a000 988d1001 00080000 VFETCH R1.xyzw, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:34 NUM:1 COMP:0 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main() #0 { main_body: %0 = call float @llvm.R600.load.input(i32 4) %1 = call float @llvm.R600.load.input(i32 5) %2 = call float @llvm.R600.load.input(i32 6) %3 = call float @llvm.R600.load.input(i32 7) %4 = call float @llvm.R600.load.input(i32 8) %5 = call float @llvm.R600.load.input(i32 9) %6 = call float @llvm.R600.load.input(i32 10) %7 = call float @llvm.R600.load.input(i32 11) %8 = insertelement <4 x float> undef, float %0, i32 0 %9 = insertelement <4 x float> %8, float %1, i32 1 %10 = insertelement <4 x float> %9, float %2, i32 2 %11 = insertelement <4 x float> %10, float %3, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %11, i32 60, i32 1) %12 = insertelement <4 x float> undef, float %4, i32 0 %13 = insertelement <4 x float> %12, float %5, i32 1 %14 = insertelement <4 x float> %13, float %6, i32 2 %15 = insertelement <4 x float> %14, float %7, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %15, i32 0, i32 2) ret void } ; Function Attrs: readnone declare float @llvm.R600.load.input(i32) #1 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } attributes #1 = { readnone } # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T2_W in %vreg0, %T2_Z in %vreg1, %T2_Y in %vreg2, %T2_X in %vreg3, %T1_W in %vreg4, %T1_Z in %vreg5, %T1_Y in %vreg6, %T1_X in %vreg7 BB#0: derived from LLVM BB %main_body Live Ins: %T2_W %T2_Z %T2_Y %T2_X %T1_W %T1_Z %T1_Y %T1_X CF_CALL_FS_R600 R600_ExportSwz %T1_XYZW, 1, 60, 0, 1, 2, 3, 40, 0 R600_ExportSwz %T2_XYZW, 2, 0, 0, 1, 2, 3, 40, 1 CF_END_R600 # End machine code for function main. ===== SHADER #8 ============================================ VS/RV670/R600 ===== ===== 8 dw ===== 3 gprs ===== 1 stack ========================================== 0000 00000000 89800000 CALL_FS @0 0002 c000a03c 94400688 EXPORT_DONE POS 60 R1.xyzw VPM 0004 c0014000 94600688 EXPORT_DONE PARAM 0 R2.xyzw VPM EOP ===== SHADER_END =============================================================== -------------------------------------------------------------- VERT DCL IN[0] DCL OUT[0], POSITION 0: MOV OUT[0], IN[0] 1: END STREAMOUT 0: MEM_STREAM0_BUF0[0..0] <- OUT[0].x ; ModuleID = 'tgsi' define void @main() #0 { main_body: %0 = call float @llvm.R600.load.input(i32 4) %1 = call float @llvm.R600.load.input(i32 5) %2 = call float @llvm.R600.load.input(i32 6) %3 = call float @llvm.R600.load.input(i32 7) %4 = insertelement <4 x float> undef, float %0, i32 0 %5 = insertelement <4 x float> %4, float %1, i32 1 %6 = insertelement <4 x float> %5, float %2, i32 2 %7 = insertelement <4 x float> %6, float %3, i32 3 call void @llvm.R600.store.stream.output(<4 x float> %7, i32 0, i32 0, i32 1) %8 = insertelement <4 x float> undef, float %0, i32 0 %9 = insertelement <4 x float> %8, float %1, i32 1 %10 = insertelement <4 x float> %9, float %2, i32 2 %11 = insertelement <4 x float> %10, float %3, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %11, i32 60, i32 1) call void @llvm.R600.store.dummy(i32 2) ret void } ; Function Attrs: readnone declare float @llvm.R600.load.input(i32) #1 declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32) declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) declare void @llvm.R600.store.dummy(i32) attributes #0 = { "ShaderType"="1" } attributes #1 = { readnone } # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 BB#0: derived from LLVM BB %main_body Live Ins: %T1_W %T1_Z %T1_Y %T1_X CF_CALL_FS_R600 R600_ExportBuf %T1_XYZW, 0, 0, 4095, 1, 32, 0 R600_ExportSwz %T1_XYZW, 1, 60, 0, 1, 2, 3, 40, 0 R600_ExportSwz %T0_XYZW, 2, 0, 7, 7, 7, 7, 40, 1 CF_END_R600 PAD # End machine code for function main. ===== SHADER #9 ============================================ VS/RV670/R600 ===== ===== 12 dw ===== 2 gprs ===== 1 stack ========================================= 0000 00000000 89800000 CALL_FS @0 0002 00008000 90401fff MEM_STREAM0 WRITE 0 R1.x___ ES:0 VPM 0004 c000a03c 94400688 EXPORT_DONE POS 60 R1.xyzw VPM 0006 c0004000 94600fff EXPORT_DONE PARAM 0 R0.____ VPM EOP ===== SHADER_END =============================================================== -------------------------------------------------------------- FRAG DCL IN[0], GENERIC[0], CONSTANT 0: END ; ModuleID = 'tgsi' define void @main() #0 { main_body: %0 = call float @llvm.R600.load.input(i32 0) %1 = call float @llvm.R600.load.input(i32 1) %2 = call float @llvm.R600.load.input(i32 2) %3 = call float @llvm.R600.load.input(i32 3) call void @llvm.R600.store.dummy(i32 0) ret void } ; Function Attrs: readnone declare float @llvm.R600.load.input(i32) #1 declare void @llvm.R600.store.dummy(i32) attributes #0 = { "ShaderType"="0" } attributes #1 = { readnone } # Machine code for function main: Post SSA, not tracking liveness BB#0: derived from LLVM BB %main_body R600_ExportSwz %T0_XYZW, 0, 0, 7, 7, 7, 7, 40, 1 CF_END_R600 # End machine code for function main. ===== SHADER #10 =========================================== PS/RV670/R600 ===== ===== 4 dw ===== 1 gprs ===== 1 stack ========================================== 0000 c0000000 94600fff EXPORT_DONE PIXEL 0 R0.____ VPM EOP ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, } {src_offset = 16, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, } ===== SHADER #11 ======================================== FETCH/RV670/R600 ===== ===== 12 dw ===== 3 gprs ===== 0 stack ========================================= 0000 00000002 81000400 VTX 2 @4 0004 7c00a000 88cd1001 00080000 VFETCH R1.xyzw, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1) 0008 7c00a000 88cd1002 00080010 VFETCH R2.xyzw, R0.x + 16b , RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, } {src_offset = 16, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_SINT, } ===== SHADER #12 ======================================== FETCH/RV670/R600 ===== ===== 12 dw ===== 3 gprs ===== 0 stack ========================================= 0000 00000002 81000400 VTX 2 @4 0004 7c00a000 88cd1001 00080000 VFETCH R1.xyzw, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1) 0008 7c00a000 d88d1002 00080010 VFETCH R2.xyzw, R0.x + 16b , RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:34 NUM:1 COMP:1 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, } {src_offset = 16, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_UINT, } ===== SHADER #13 ======================================== FETCH/RV670/R600 ===== ===== 12 dw ===== 3 gprs ===== 0 stack ========================================= 0000 00000002 81000400 VTX 2 @4 0004 7c00a000 88cd1001 00080000 VFETCH R1.xyzw, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:35 NUM:0 COMP:0 MODE:1) 0008 7c00a000 988d1002 00080010 VFETCH R2.xyzw, R0.x + 16b , RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:34 NUM:1 COMP:0 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32_UINT, } ===== SHADER #14 ======================================== FETCH/RV670/R600 ===== ===== 8 dw ===== 2 gprs ===== 0 stack ========================================== 0000 00000002 81000000 VTX 1 @4 0004 7c00a000 93564001 00080000 VFETCH R1.x001, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:13 NUM:1 COMP:0 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32_UINT, } ===== SHADER #15 ======================================== FETCH/RV670/R600 ===== ===== 8 dw ===== 2 gprs ===== 0 stack ========================================== 0000 00000002 81000000 VTX 1 @4 0004 7c00a000 97561001 00080000 VFETCH R1.xy01, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:29 NUM:1 COMP:0 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32_UINT, } ===== SHADER #16 ======================================== FETCH/RV670/R600 ===== ===== 8 dw ===== 2 gprs ===== 0 stack ========================================== 0000 00000002 81000000 VTX 1 @4 0004 7c00a000 9bd51001 00080000 VFETCH R1.xyz1, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:47 NUM:1 COMP:0 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_UINT, } ===== SHADER #17 ======================================== FETCH/RV670/R600 ===== ===== 8 dw ===== 2 gprs ===== 0 stack ========================================== 0000 00000002 81000000 VTX 1 @4 0004 7c00a000 988d1001 00080000 VFETCH R1.xyzw, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:34 NUM:1 COMP:0 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main() #0 { main_body: %0 = call float @llvm.R600.load.input(i32 4) %1 = call float @llvm.R600.load.input(i32 5) %2 = call float @llvm.R600.load.input(i32 6) %3 = call float @llvm.R600.load.input(i32 7) %4 = call float @llvm.R600.load.input(i32 8) %5 = call float @llvm.R600.load.input(i32 9) %6 = call float @llvm.R600.load.input(i32 10) %7 = call float @llvm.R600.load.input(i32 11) %8 = insertelement <4 x float> undef, float %0, i32 0 %9 = insertelement <4 x float> %8, float %1, i32 1 %10 = insertelement <4 x float> %9, float %2, i32 2 %11 = insertelement <4 x float> %10, float %3, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %11, i32 60, i32 1) %12 = insertelement <4 x float> undef, float %4, i32 0 %13 = insertelement <4 x float> %12, float %5, i32 1 %14 = insertelement <4 x float> %13, float %6, i32 2 %15 = insertelement <4 x float> %14, float %7, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %15, i32 0, i32 2) ret void } ; Function Attrs: readnone declare float @llvm.R600.load.input(i32) #1 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } attributes #1 = { readnone } # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T2_W in %vreg0, %T2_Z in %vreg1, %T2_Y in %vreg2, %T2_X in %vreg3, %T1_W in %vreg4, %T1_Z in %vreg5, %T1_Y in %vreg6, %T1_X in %vreg7 BB#0: derived from LLVM BB %main_body Live Ins: %T2_W %T2_Z %T2_Y %T2_X %T1_W %T1_Z %T1_Y %T1_X CF_CALL_FS_R600 R600_ExportSwz %T1_XYZW, 1, 60, 0, 1, 2, 3, 40, 0 R600_ExportSwz %T2_XYZW, 2, 0, 0, 1, 2, 3, 40, 1 CF_END_R600 # End machine code for function main. ===== SHADER #18 =========================================== VS/RV670/R600 ===== ===== 8 dw ===== 3 gprs ===== 1 stack ========================================== 0000 00000000 89800000 CALL_FS @0 0002 c000a03c 94400688 EXPORT_DONE POS 60 R1.xyzw VPM 0004 c0014000 94600688 EXPORT_DONE PARAM 0 R2.xyzw VPM EOP ===== SHADER_END =============================================================== -------------------------------------------------------------- VERT DCL IN[0] DCL OUT[0], POSITION 0: MOV OUT[0], IN[0] 1: END STREAMOUT 0: MEM_STREAM0_BUF0[0..0] <- OUT[0].x ; ModuleID = 'tgsi' define void @main() #0 { main_body: %0 = call float @llvm.R600.load.input(i32 4) %1 = call float @llvm.R600.load.input(i32 5) %2 = call float @llvm.R600.load.input(i32 6) %3 = call float @llvm.R600.load.input(i32 7) %4 = insertelement <4 x float> undef, float %0, i32 0 %5 = insertelement <4 x float> %4, float %1, i32 1 %6 = insertelement <4 x float> %5, float %2, i32 2 %7 = insertelement <4 x float> %6, float %3, i32 3 call void @llvm.R600.store.stream.output(<4 x float> %7, i32 0, i32 0, i32 1) %8 = insertelement <4 x float> undef, float %0, i32 0 %9 = insertelement <4 x float> %8, float %1, i32 1 %10 = insertelement <4 x float> %9, float %2, i32 2 %11 = insertelement <4 x float> %10, float %3, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %11, i32 60, i32 1) call void @llvm.R600.store.dummy(i32 2) ret void } ; Function Attrs: readnone declare float @llvm.R600.load.input(i32) #1 declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32) declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) declare void @llvm.R600.store.dummy(i32) attributes #0 = { "ShaderType"="1" } attributes #1 = { readnone } # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 BB#0: derived from LLVM BB %main_body Live Ins: %T1_W %T1_Z %T1_Y %T1_X CF_CALL_FS_R600 R600_ExportBuf %T1_XYZW, 0, 0, 4095, 1, 32, 0 R600_ExportSwz %T1_XYZW, 1, 60, 0, 1, 2, 3, 40, 0 R600_ExportSwz %T0_XYZW, 2, 0, 7, 7, 7, 7, 40, 1 CF_END_R600 PAD # End machine code for function main. ===== SHADER #19 =========================================== VS/RV670/R600 ===== ===== 12 dw ===== 2 gprs ===== 1 stack ========================================= 0000 00000000 89800000 CALL_FS @0 0002 00008000 90401fff MEM_STREAM0 WRITE 0 R1.x___ ES:0 VPM 0004 c000a03c 94400688 EXPORT_DONE POS 60 R1.xyzw VPM 0006 c0004000 94600fff EXPORT_DONE PARAM 0 R0.____ VPM EOP ===== SHADER_END =============================================================== -------------------------------------------------------------- FRAG DCL IN[0], GENERIC[0], CONSTANT 0: END ; ModuleID = 'tgsi' define void @main() #0 { main_body: %0 = call float @llvm.R600.load.input(i32 0) %1 = call float @llvm.R600.load.input(i32 1) %2 = call float @llvm.R600.load.input(i32 2) %3 = call float @llvm.R600.load.input(i32 3) call void @llvm.R600.store.dummy(i32 0) ret void } ; Function Attrs: readnone declare float @llvm.R600.load.input(i32) #1 declare void @llvm.R600.store.dummy(i32) attributes #0 = { "ShaderType"="0" } attributes #1 = { readnone } # Machine code for function main: Post SSA, not tracking liveness BB#0: derived from LLVM BB %main_body R600_ExportSwz %T0_XYZW, 0, 0, 7, 7, 7, 7, 40, 1 CF_END_R600 # End machine code for function main. ===== SHADER #20 =========================================== PS/RV670/R600 ===== ===== 4 dw ===== 1 gprs ===== 1 stack ========================================== 0000 c0000000 94600fff EXPORT_DONE PIXEL 0 R0.____ VPM EOP ===== SHADER_END =============================================================== -------------------------------------------------------------- Running synchronized to the vertical refresh. The framerate should be approximately the same as the monitor refresh rate. FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main() #0 { main_body: %0 = call float @llvm.R600.load.input(i32 0) %1 = call float @llvm.R600.load.input(i32 1) %2 = call float @llvm.R600.load.input(i32 2) %3 = call float @llvm.R600.load.input(i32 3) %4 = insertelement <4 x float> undef, float %0, i32 0 %5 = insertelement <4 x float> %4, float %1, i32 1 %6 = insertelement <4 x float> %5, float %2, i32 2 %7 = insertelement <4 x float> %6, float %3, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %7, i32 0, i32 0) ret void } ; Function Attrs: readnone declare float @llvm.R600.load.input(i32) #1 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="0" } attributes #1 = { readnone } # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T0_W in %vreg0, %T0_Z in %vreg1, %T0_Y in %vreg2, %T0_X in %vreg3 BB#0: derived from LLVM BB %main_body Live Ins: %T0_W %T0_Z %T0_Y %T0_X R600_ExportSwz %T0_XYZW, 0, 0, 0, 1, 2, 3, 40, 1 CF_END_R600 # End machine code for function main. ===== SHADER #21 =========================================== PS/RV670/R600 ===== ===== 4 dw ===== 1 gprs ===== 1 stack ========================================== 0000 c0000000 94600688 EXPORT_DONE PIXEL 0 R0.xyzw VPM EOP ===== SHADER_END =============================================================== -------------------------------------------------------------- VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL IN[6] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..10] DCL TEMP[0..6] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: DP3 TEMP[1].x, IN[1], IN[1] 5: RSQ TEMP[1].x, TEMP[1] 6: MUL TEMP[0], IN[1], TEMP[1].xxxx 7: MOV TEMP[2].w, IN[6].xxxx 8: MOV TEMP[3], IN[3] 9: MAD TEMP[3].xyz, CONST[4], IN[2], IN[5] 10: MOV_SAT OUT[1], TEMP[3] 11: DP3 TEMP[2].x, TEMP[0], CONST[5] 12: DP3 TEMP[2].y, TEMP[0], CONST[7] 13: MUL TEMP[4], CONST[8], IN[2] 14: MUL TEMP[5], CONST[9], IN[3] 15: MUL TEMP[6], CONST[10], IN[4] 16: LIT TEMP[1], TEMP[2] 17: ADD TEMP[3], TEMP[4], TEMP[3] 18: MAD TEMP[3], TEMP[1].yyyy, TEMP[5], TEMP[3] 19: MAD_SAT OUT[1].xyz, TEMP[1].zzzz, TEMP[6], TEMP[3] 20: END ; ModuleID = 'tgsi' define void @main() #0 { main_body: %0 = call float @llvm.R600.load.input(i32 4) %1 = call float @llvm.R600.load.input(i32 5) %2 = call float @llvm.R600.load.input(i32 6) %3 = call float @llvm.R600.load.input(i32 7) %4 = call float @llvm.R600.load.input(i32 8) %5 = call float @llvm.R600.load.input(i32 9) %6 = call float @llvm.R600.load.input(i32 10) %7 = call float @llvm.R600.load.input(i32 11) %8 = call float @llvm.R600.load.input(i32 12) %9 = call float @llvm.R600.load.input(i32 13) %10 = call float @llvm.R600.load.input(i32 14) %11 = call float @llvm.R600.load.input(i32 15) %12 = call float @llvm.R600.load.input(i32 16) %13 = call float @llvm.R600.load.input(i32 17) %14 = call float @llvm.R600.load.input(i32 18) %15 = call float @llvm.R600.load.input(i32 19) %16 = call float @llvm.R600.load.input(i32 20) %17 = call float @llvm.R600.load.input(i32 21) %18 = call float @llvm.R600.load.input(i32 22) %19 = call float @llvm.R600.load.input(i32 23) %20 = call float @llvm.R600.load.input(i32 24) %21 = call float @llvm.R600.load.input(i32 25) %22 = call float @llvm.R600.load.input(i32 26) %23 = call float @llvm.R600.load.input(i32 27) %24 = call float @llvm.R600.load.input(i32 28) %25 = call float @llvm.R600.load.input(i32 29) %26 = call float @llvm.R600.load.input(i32 30) %27 = call float @llvm.R600.load.input(i32 31) %28 = load <4 x float> addrspace(8)* null %29 = extractelement <4 x float> %28, i32 0 %30 = fmul float %0, %29 %31 = load <4 x float> addrspace(8)* null %32 = extractelement <4 x float> %31, i32 1 %33 = fmul float %0, %32 %34 = load <4 x float> addrspace(8)* null %35 = extractelement <4 x float> %34, i32 2 %36 = fmul float %0, %35 %37 = load <4 x float> addrspace(8)* null %38 = extractelement <4 x float> %37, i32 3 %39 = fmul float %0, %38 %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %41 = extractelement <4 x float> %40, i32 0 %42 = fmul float %1, %41 %43 = fadd float %42, %30 %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %45 = extractelement <4 x float> %44, i32 1 %46 = fmul float %1, %45 %47 = fadd float %46, %33 %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %49 = extractelement <4 x float> %48, i32 2 %50 = fmul float %1, %49 %51 = fadd float %50, %36 %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %53 = extractelement <4 x float> %52, i32 3 %54 = fmul float %1, %53 %55 = fadd float %54, %39 %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %57 = extractelement <4 x float> %56, i32 0 %58 = fmul float %2, %57 %59 = fadd float %58, %43 %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %61 = extractelement <4 x float> %60, i32 1 %62 = fmul float %2, %61 %63 = fadd float %62, %47 %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %65 = extractelement <4 x float> %64, i32 2 %66 = fmul float %2, %65 %67 = fadd float %66, %51 %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %69 = extractelement <4 x float> %68, i32 3 %70 = fmul float %2, %69 %71 = fadd float %70, %55 %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %73 = extractelement <4 x float> %72, i32 0 %74 = fmul float %3, %73 %75 = fadd float %74, %59 %76 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %77 = extractelement <4 x float> %76, i32 1 %78 = fmul float %3, %77 %79 = fadd float %78, %63 %80 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %81 = extractelement <4 x float> %80, i32 2 %82 = fmul float %3, %81 %83 = fadd float %82, %67 %84 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %85 = extractelement <4 x float> %84, i32 3 %86 = fmul float %3, %85 %87 = fadd float %86, %71 %88 = insertelement <4 x float> undef, float %4, i32 0 %89 = insertelement <4 x float> %88, float %5, i32 1 %90 = insertelement <4 x float> %89, float %6, i32 2 %91 = insertelement <4 x float> %90, float 0.000000e+00, i32 3 %92 = insertelement <4 x float> undef, float %4, i32 0 %93 = insertelement <4 x float> %92, float %5, i32 1 %94 = insertelement <4 x float> %93, float %6, i32 2 %95 = insertelement <4 x float> %94, float 0.000000e+00, i32 3 %96 = call float @llvm.AMDGPU.dp4(<4 x float> %91, <4 x float> %95) %97 = call float @fabs(float %96) %98 = call float @llvm.AMDGPU.rsq(float %97) %99 = fmul float %4, %98 %100 = fmul float %5, %98 %101 = fmul float %6, %98 %102 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %103 = extractelement <4 x float> %102, i32 0 %104 = fmul float %103, %8 %105 = fadd float %104, %20 %106 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %107 = extractelement <4 x float> %106, i32 1 %108 = fmul float %107, %9 %109 = fadd float %108, %21 %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %111 = extractelement <4 x float> %110, i32 2 %112 = fmul float %111, %10 %113 = fadd float %112, %22 %114 = call float @llvm.AMDIL.clamp.(float %105, float 0.000000e+00, float 1.000000e+00) %115 = call float @llvm.AMDIL.clamp.(float %109, float 0.000000e+00, float 1.000000e+00) %116 = call float @llvm.AMDIL.clamp.(float %113, float 0.000000e+00, float 1.000000e+00) %117 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00) %118 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %119 = extractelement <4 x float> %118, i32 0 %120 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %121 = extractelement <4 x float> %120, i32 1 %122 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %123 = extractelement <4 x float> %122, i32 2 %124 = insertelement <4 x float> undef, float %99, i32 0 %125 = insertelement <4 x float> %124, float %100, i32 1 %126 = insertelement <4 x float> %125, float %101, i32 2 %127 = insertelement <4 x float> %126, float 0.000000e+00, i32 3 %128 = insertelement <4 x float> undef, float %119, i32 0 %129 = insertelement <4 x float> %128, float %121, i32 1 %130 = insertelement <4 x float> %129, float %123, i32 2 %131 = insertelement <4 x float> %130, float 0.000000e+00, i32 3 %132 = call float @llvm.AMDGPU.dp4(<4 x float> %127, <4 x float> %131) %133 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) %134 = extractelement <4 x float> %133, i32 0 %135 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) %136 = extractelement <4 x float> %135, i32 1 %137 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) %138 = extractelement <4 x float> %137, i32 2 %139 = insertelement <4 x float> undef, float %99, i32 0 %140 = insertelement <4 x float> %139, float %100, i32 1 %141 = insertelement <4 x float> %140, float %101, i32 2 %142 = insertelement <4 x float> %141, float 0.000000e+00, i32 3 %143 = insertelement <4 x float> undef, float %134, i32 0 %144 = insertelement <4 x float> %143, float %136, i32 1 %145 = insertelement <4 x float> %144, float %138, i32 2 %146 = insertelement <4 x float> %145, float 0.000000e+00, i32 3 %147 = call float @llvm.AMDGPU.dp4(<4 x float> %142, <4 x float> %146) %148 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %149 = extractelement <4 x float> %148, i32 0 %150 = fmul float %149, %8 %151 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %152 = extractelement <4 x float> %151, i32 1 %153 = fmul float %152, %9 %154 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %155 = extractelement <4 x float> %154, i32 2 %156 = fmul float %155, %10 %157 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %158 = extractelement <4 x float> %157, i32 0 %159 = fmul float %158, %12 %160 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %161 = extractelement <4 x float> %160, i32 1 %162 = fmul float %161, %13 %163 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %164 = extractelement <4 x float> %163, i32 2 %165 = fmul float %164, %14 %166 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %167 = extractelement <4 x float> %166, i32 0 %168 = fmul float %167, %16 %169 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %170 = extractelement <4 x float> %169, i32 1 %171 = fmul float %170, %17 %172 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %173 = extractelement <4 x float> %172, i32 2 %174 = fmul float %173, %18 %175 = fcmp uge float %132, 0.000000e+00 %176 = select i1 %175, float %132, float 0.000000e+00 %177 = fcmp uge float %147, 0.000000e+00 %178 = select i1 %177, float %147, float 0.000000e+00 %179 = call float @llvm.pow.f32(float %178, float %24) %180 = fcmp ult float %132, 0.000000e+00 %181 = select i1 %180, float 0.000000e+00, float %179 %182 = fadd float %150, %105 %183 = fadd float %153, %109 %184 = fadd float %156, %113 %185 = fmul float %176, %159 %186 = fadd float %185, %182 %187 = fmul float %176, %162 %188 = fadd float %187, %183 %189 = fmul float %176, %165 %190 = fadd float %189, %184 %191 = fmul float %181, %168 %192 = fadd float %191, %186 %193 = fmul float %181, %171 %194 = fadd float %193, %188 %195 = fmul float %181, %174 %196 = fadd float %195, %190 %197 = call float @llvm.AMDIL.clamp.(float %192, float 0.000000e+00, float 1.000000e+00) %198 = call float @llvm.AMDIL.clamp.(float %194, float 0.000000e+00, float 1.000000e+00) %199 = call float @llvm.AMDIL.clamp.(float %196, float 0.000000e+00, float 1.000000e+00) %200 = insertelement <4 x float> undef, float %75, i32 0 %201 = insertelement <4 x float> %200, float %79, i32 1 %202 = insertelement <4 x float> %201, float %83, i32 2 %203 = insertelement <4 x float> %202, float %87, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %203, i32 60, i32 1) %204 = insertelement <4 x float> undef, float %197, i32 0 %205 = insertelement <4 x float> %204, float %198, i32 1 %206 = insertelement <4 x float> %205, float %199, i32 2 %207 = insertelement <4 x float> %206, float %117, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %207, i32 0, i32 2) ret void } ; Function Attrs: readnone declare float @llvm.R600.load.input(i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.rsq(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #1 ; Function Attrs: nounwind readonly declare float @llvm.pow.f32(float, float) #3 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } attributes #1 = { readnone } attributes #2 = { readonly } attributes #3 = { nounwind readonly } # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T7_X in %vreg0, %T6_Z in %vreg1, %T6_Y in %vreg2, %T6_X in %vreg3, %T5_Z in %vreg4, %T5_Y in %vreg5, %T5_X in %vreg6, %T4_W in %vreg7, %T4_Z in %vreg8, %T4_Y in %vreg9, %T4_X in %vreg10, %T3_Z in %vreg11, %T3_Y in %vreg12, %T3_X in %vreg13, %T2_Z in %vreg14, %T2_Y in %vreg15, %T2_X in %vreg16, %T1_W in %vreg17, %T1_Z in %vreg18, %T1_Y in %vreg19, %T1_X in %vreg20 BB#0: derived from LLVM BB %main_body Live Ins: %T7_X %T6_Z %T6_Y %T6_X %T5_Z %T5_Y %T5_X %T4_W %T4_Z %T4_Y %T4_X %T3_Z %T3_Y %T3_X %T2_Z %T2_Y %T2_X %T1_W %T1_Z %T1_Y %T1_X CF_CALL_FS_R600 CF_ALU 0, 0, 0, 2, 0, 0, 0, 67 BUNDLE %T0_X, %T0_Y, %T0_Z, %T2_W, %T2_XYZW, %T2_X, %T2_Y, %T2_Z, %T1_X, %KC0_128_X, %PRED_SEL_OFF, %KC0_128_Y, %KC0_128_Z, %ALU_LITERAL_X, %T2_XYZW * %T0_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_X, 0, 0, 0, 2048, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_Y, 0, 0, 0, 2049, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_Z, 0, 0, 0, 2050, 0, pred:%PRED_SEL_OFF, 0, 0 * %T2_W = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T2_XYZW, %T2_XYZW BUNDLE %T0_X, %T0_Y, %T0_Z, %T0_W, %T2_X, %PRED_SEL_OFF, %T2_Y, %T2_Z, %T2_W * %T0_X = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T2_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_Y = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T2_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_Z = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T2_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_W = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 %T8_X = RECIPSQRT_CLAMPED_r600 1, 0, 0, 0, %T0_W, 0, 0, 1, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T7_Y, %T7_Z, %T0_W, %T1_Y, %KC0_129_X, %T0_X, %PRED_SEL_OFF, %KC0_129_Y, %T0_Y, %KC0_129_Z, %T0_Z * %T7_Y = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_X, 0, 0, 2052, %T0_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T7_Z = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_Y, 0, 0, 2053, %T0_Y, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_W = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_Z, 0, 0, 2054, %T0_Z, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T0_X, %T7_Y, %T2_Z, %T2_XYZW, %T2_X, %T2_Y, %T2_W, %T5_W, %T1_Z, %KC0_130_X, %T7_Y, %PRED_SEL_OFF, %KC0_130_Y, %T7_Z, %T2_Z, %T8_X, %T2_XYZW, %KC0_130_Z, %T0_W * %T0_X = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_X, 0, 0, 2056, %T7_Y, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T7_Y = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_Y, 0, 0, 2057, %T7_Z, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T2_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T8_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 2, %T2_XYZW, %T2_XYZW * %T5_W = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_Z, 0, 0, 2058, %T0_W, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T0_X, %T0_XYZW, %T0_Y, %T0_Z, %T0_W, %T2_Y, %T2_XYZW, %T2_X, %T2_Z, %T2_W, %T7_Z, %T1_W, %KC0_131_X, %T0_X, %PRED_SEL_OFF, %T2_Y, %T8_X, %T2_XYZW, %KC0_137_Z, %T4_Z * %T0_X = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_X, 0, 0, 2060, %T0_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW * %T2_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T8_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T2_XYZW, %T2_XYZW * %T7_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_137_Z, 0, 0, 0, 2086, %T4_Z, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T2_X, %T2_XYZW, %T2_Y, %T2_Z, %T2_W, %T5_Y, %T4_Z, %T3_W, %T2_X, %T8_X, %PRED_SEL_OFF, %T2_XYZW, %KC0_138_Y, %T5_Y, %KC0_138_Z, %T5_Z, %KC0_138_X, %T5_X * %T2_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T8_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T2_XYZW, %T2_XYZW * %T5_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_138_Y, 0, 0, 0, 2089, %T5_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T4_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_138_Z, 0, 0, 0, 2090, %T5_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T3_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_138_X, 0, 0, 0, 2088, %T5_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 1 BUNDLE %T5_X, %T6_Y, %T5_Z, %KC0_132_X, %T3_X, %T6_X, %PRED_SEL_OFF, %KC0_132_Y, %T3_Y, %T6_Y, %KC0_132_Z, %T3_Z, %T6_Z * %T5_X = MULADD_IEEE_r600 0, 0, %KC0_132_X, 0, 0, 2064, %T3_X, 0, 0, -1, %T6_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T6_Y = MULADD_IEEE_r600 0, 0, %KC0_132_Y, 0, 0, 2065, %T3_Y, 0, 0, -1, %T6_Y, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T5_Z = MULADD_IEEE_r600 0, 0, %KC0_132_Z, 0, 0, 2066, %T3_Z, 0, 0, -1, %T6_Z, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T3_X, %T6_Y, %T3_Z, %KC0_136_X, %T3_X, %T5_X, %PRED_SEL_OFF, %KC0_136_Y, %T3_Y, %T6_Y, %KC0_136_Z, %T3_Z, %T5_Z * %T3_X = MULADD_IEEE_r600 0, 0, %KC0_136_X, 0, 0, 2080, %T3_X, 0, 0, -1, %T5_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T6_Y = MULADD_IEEE_r600 0, 0, %KC0_136_Y, 0, 0, 2081, %T3_Y, 0, 0, -1, %T6_Y, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T3_Z = MULADD_IEEE_r600 0, 0, %KC0_136_Z, 0, 0, 2082, %T3_Z, 0, 0, -1, %T5_Z, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T1_X, %T3_Y, %T5_Z, %T1_X, %KC0_128_W, %PRED_SEL_OFF, %KC0_137_X, %T4_X, %KC0_137_Y, %T4_Y * %T1_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_W, 0, 0, 0, 2051, 0, pred:%PRED_SEL_OFF, 0, 0 * %T3_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_137_X, 0, 0, 0, 2084, %T4_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T5_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_137_Y, 0, 0, 0, 2085, %T4_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T1_X, %T9_Z, %T9_XYZW, %T9_X, %T9_Y, %T9_W, %T1_Y, %KC0_129_W, %PV_X, %PRED_SEL_OFF, %KC0_135_Z * %T1_X = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_W, 0, 0, 2055, %PV_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T9_Z = MOV 1, 0, 0, 0, %KC0_135_Z, 0, 0, 0, 2078, 1, pred:%PRED_SEL_OFF, 0, 0, %T9_XYZW BUNDLE %T1_X, %T9_Y, %T9_XYZW, %T9_X, %T9_Z, %T9_W, %T1_Z, %KC0_130_W, %T1_X, %PRED_SEL_OFF, %KC0_135_Y, %T9_XYZW * %T1_X = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_W, 0, 0, 2059, %T1_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T9_Y = MOV 1, 0, 0, 0, %KC0_135_Y, 0, 0, 0, 2077, 1, pred:%PRED_SEL_OFF, 0, 0, %T9_XYZW, %T9_XYZW BUNDLE %T9_X, %T9_XYZW, %T9_Y, %T9_Z, %T9_W, %T8_Z, %T8_XYZW, %T8_X, %T8_Y, %T8_W, %KC0_135_X, %PRED_SEL_OFF, %T9_XYZW, %KC0_133_Z * %T9_X = MOV 1, 0, 0, 0, %KC0_135_X, 0, 0, 0, 2076, 0, pred:%PRED_SEL_OFF, 0, 0, %T9_XYZW, %T9_XYZW * %T8_Z = MOV 1, 0, 0, 0, %KC0_133_Z, 0, 0, 0, 2070, 1, pred:%PRED_SEL_OFF, 0, 0, %T8_XYZW BUNDLE %T8_Y, %T8_XYZW, %T8_X, %T8_Z, %T8_W, %T9_W, %T9_XYZW, %T9_X, %T9_Y, %T9_Z, %KC0_133_Y, %PRED_SEL_OFF, %T8_XYZW, %T2_W * %T8_Y = MOV 1, 0, 0, 0, %KC0_133_Y, 0, 0, 0, 2069, 0, pred:%PRED_SEL_OFF, 0, 0, %T8_XYZW, %T8_XYZW * %T9_W = MOV 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T9_XYZW BUNDLE %T1_X, %T1_Y, %T1_Z, %T1_W, %T2_X, %T9_X, %PRED_SEL_OFF, %T2_Y, %T9_Y, %T2_Z, %T9_Z, %T2_W, %T9_W * %T1_X = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T9_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Y = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T9_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Z = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T9_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_W = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T9_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T4_X, %T0_W, %T0_XYZW, %T0_X, %T0_Y, %T0_Z, %PV_Y, %ZERO, %PRED_SEL_OFF, %T1_W, %KC0_131_W, %T1_X, %T0_XYZW * %T4_X = CNDGE_r600 0, 0, %PV_Y, 0, 0, -1, %PV_Y, 0, 0, -1, %ZERO, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_W = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_W, 0, 0, 2063, %T1_X, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW, %T0_XYZW %T1_X = LOG_IEEE_r600 1, 0, 0, 0, %T4_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 %T0_Z = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_Z, 0, 0, 2062, %T5_W, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW, %T0_XYZW BUNDLE %T1_X, %T0_Y, %T0_XYZW, %T0_X, %T0_Z, %T0_W, %T7_X, %T1_X, %PRED_SEL_OFF, %T1_W, %KC0_131_Y, %T7_Y, %T0_XYZW * %T1_X = MUL 0, 0, 1, 0, 0, 0, %T7_X, 0, 0, 0, -1, %T1_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_Y = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_Y, 0, 0, 2061, %T7_Y, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW, %T0_XYZW %T1_X = EXP_IEEE_r600 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T8_X, %T8_XYZW, %T8_Y, %T8_Z, %T8_W, %KC0_133_X, %PRED_SEL_OFF, %T8_XYZW, %T2_W * %T8_X = MOV 1, 0, 0, 0, %KC0_133_X, 0, 0, 0, 2068, 0, pred:%PRED_SEL_OFF, 0, 0, %T8_XYZW, %T8_XYZW * %T8_W = MOV 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T8_XYZW BUNDLE %T1_X, %T1_Y, %T1_Z, %T1_W, %T2_X, %T8_X, %PRED_SEL_OFF, %T2_Y, %T8_Y, %T2_Z, %T8_Z, %T2_W, %T8_W * %T1_X = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T8_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Y = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T8_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Z = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T8_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_W = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T8_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T1_X, %T2_Y, %PV_Y, %T1_X, %ZERO, %PRED_SEL_OFF, %T1_Y * %T1_X = CNDGE_r600 0, 0, %PV_Y, 0, 0, -1, %T1_X, 0, 0, -1, %ZERO, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T2_Y = CNDGE_r600 0, 0, %T1_Y, 0, 0, -1, %T1_Y, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T2_X, %T1_Y, %T2_Y, %T7_Z, %T3_Z, %PRED_SEL_OFF, %T5_Z, %T6_Y * %T2_X = MULADD_IEEE_r600 0, 0, %T2_Y, 0, 0, -1, %T7_Z, 0, 0, -1, %T3_Z, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Y = MULADD_IEEE_r600 0, 0, %T2_Y, 0, 0, -1, %T5_Z, 0, 0, -1, %T6_Y, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 3 %T1_Z = MULADD_IEEE_r600 0, 0, %T2_Y, 0, 0, -1, %T3_Y, 0, 0, -1, %T3_X, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T5_Y, %T5_XYZW, %T5_X, %T5_Z, %T5_W, %T1_X, %T5_Y, %T1_Y, %PRED_SEL_OFF, %T4_Z, %T2_X * %T5_Y = MULADD_IEEE_r600 0, 1, %T1_X, 0, 0, -1, %T5_Y, 0, 0, -1, %T1_Y, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW * %T5_Z = MULADD_IEEE_r600 0, 1, %T1_X, 0, 0, -1, %T4_Z, 0, 0, -1, %T2_X, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW, %T5_XYZW BUNDLE %T5_X, %T5_XYZW, %T5_Y, %T5_Z, %T5_W, %T1_X, %T3_W, %T1_Z, %PRED_SEL_OFF, %T5_XYZW, %T4_W * %T5_X = MULADD_IEEE_r600 0, 1, %T1_X, 0, 0, -1, %T3_W, 0, 0, -1, %T1_Z, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW, %T5_XYZW * %T5_W = MOV 1, 0, 0, 1, %T4_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW, %T5_XYZW R600_ExportSwz %T0_XYZW, 1, 60, 0, 1, 2, 3, 40, 0 R600_ExportSwz %T5_XYZW, 2, 0, 0, 1, 2, 3, 40, 1 CF_END_R600 PAD # End machine code for function main. ===== SHADER #22 =========================================== VS/RV670/R600 ===== ===== 144 dw ===== 10 gprs ===== 1 stack ======================================= 0000 00000000 89800000 CALL_FS @0 0002 80000006 a1040000 ALU 66 @12 KC0[CB0:0-31] 0012 00100001 00000210 1 x: MUL_IEEE R0.x, R1.x, KC0[0].x 0014 00900001 20000210 y: MUL_IEEE R0.y, R1.x, KC0[0].y 0016 01100001 40000210 z: MUL_IEEE R0.z, R1.x, KC0[0].z 0018 800000f8 60401910 w: MOV R2.w, 0 0020 00004002 00005000 2 x: DOT4 __.x, R2.x, R2.x 0022 00804402 20005000 y: DOT4 __.y, R2.y, R2.y 0024 01004802 40005000 z: DOT4 __.z, R2.z, R2.z 0026 819fccfe 60005010 w: DOT4 R0.w, PV.w, PV.w 0028 00102401 20e28000 3 y: MULADD_IEEE R7.y, R1.y, KC0[1].x, R0.x 0030 00902401 40e28400 z: MULADD_IEEE R7.z, R1.y, KC0[1].y, R0.y 0032 01102401 60028800 w: MULADD_IEEE R0.w, R1.y, KC0[1].z, R0.z 0034 800000fe 01006711 t: RECIPSQRT_CLAMPED R8.x, |PV.x| 0036 00104801 000284fe 4 x: MULADD_IEEE R0.x, R1.z, KC0[2].x, PV.y 0038 00904801 20e288fe y: MULADD_IEEE R7.y, R1.z, KC0[2].y, PV.z 0040 001fe802 40480210 z: MUL_IEEE R2.z, R2.z, PS VEC_120 0042 81104801 60a28cfe w: MULADD_IEEE R5.w, R1.z, KC0[2].z, PV.w 0044 00106c01 000280fe 5 x: MULADD_IEEE R0.x, R1.w, KC0[3].x, PV.x 0046 00010402 20400210 y: MUL_IEEE R2.y, R2.y, R8.x 0048 81008889 40e00210 z: MUL_IEEE R7.z, KC0[9].z, R4.z 0050 00010002 00400210 6 x: MUL_IEEE R2.x, R2.x, R8.x 0052 0080a48a 20a00210 y: MUL_IEEE R5.y, KC0[10].y, R5.y 0054 0100a88a 40800210 z: MUL_IEEE R4.z, KC0[10].z, R5.z 0056 8000a08a 60640210 w: MUL_IEEE R3.w, KC0[10].x, R5.x VEC_021 0058 00006084 00a28006 7 x: MULADD_IEEE R5.x, KC0[4].x, R3.x, R6.x 0060 00806484 20c28406 y: MULADD_IEEE R6.y, KC0[4].y, R3.y, R6.y 0062 81006884 40a28806 z: MULADD_IEEE R5.z, KC0[4].z, R3.z, R6.z 0064 00006088 006280fe 8 x: MULADD_IEEE R3.x, KC0[8].x, R3.x, PV.x 0066 00806488 20c284fe y: MULADD_IEEE R6.y, KC0[8].y, R3.y, PV.y 0068 81006888 406288fe z: MULADD_IEEE R3.z, KC0[8].z, R3.z, PV.z 0070 01900001 00200210 9 x: MUL_IEEE R1.x, R1.x, KC0[0].w 0072 00008089 20600210 y: MUL_IEEE R3.y, KC0[9].x, R4.x 0074 80808489 40a00210 z: MUL_IEEE R5.z, KC0[9].y, R4.y 0076 01902401 002280fe 10 x: MULADD_IEEE R1.x, R1.y, KC0[1].w, PV.x 0078 80000887 41201910 z: MOV R9.z, KC0[7].z 0080 01904801 002280fe 11 x: MULADD_IEEE R1.x, R1.z, KC0[2].w, PV.x 0082 00000487 21201910 y: MOV R9.y, KC0[7].y 0084 00000885 41001910 z: MOV R8.z, KC0[5].z 0086 80000087 01201910 t: MOV R9.x, KC0[7].x 0088 00000485 21001910 12 y: MOV R8.y, KC0[5].y 0090 80000c02 61201910 w: MOV R9.w, R2.w 0092 00012002 00205000 13 x: DOT4 __.x, R2.x, R9.x 0094 00812402 20205010 y: DOT4 R1.y, R2.y, R9.y 0096 01012802 40205000 z: DOT4 __.z, R2.z, R9.z 0098 819fcc02 60205000 w: DOT4 __.w, R2.w, PV.w 0100 009fc4fe 008340f8 14 x: CNDGE R4.x, PV.y, PV.y, 0 0102 81906c01 60028001 w: MULADD_IEEE R0.w, R1.w, KC0[3].w, R1.x 0104 01106c01 40028c05 15 z: MULADD_IEEE R0.z, R1.w, KC0[3].z, R5.w 0106 800000fe 00206310 t: LOG_IEEE R1.x, PV.x 0108 001fe007 00200110 16 x: MUL R1.x, R7.x, PS 0110 80906c01 20028407 y: MULADD_IEEE R0.y, R1.w, KC0[3].y, R7.y 0112 00000085 01001910 17 x: MOV R8.x, KC0[5].x 0114 00000c02 61001910 w: MOV R8.w, R2.w 0116 800000fe 00206110 t: EXP_IEEE R1.x, PV.x 0118 001fc002 00205000 18 x: DOT4 __.x, R2.x, PV.x 0120 00810402 20205010 y: DOT4 R1.y, R2.y, R8.y 0122 01010802 40205000 z: DOT4 __.z, R2.z, R8.z 0124 819fcc02 60205000 w: DOT4 __.w, R2.w, PV.w 0126 000024fe 002340f8 19 x: CNDGE R1.x, PV.y, R1.x, 0 0128 801fc0fe 204340f8 y: CNDGE R2.y, PV.x, PV.x, 0 0130 0100e4fe 00428803 20 x: MULADD_IEEE R2.x, PV.y, R7.z, R3.z 0132 0100a4fe 202e8406 y: MULADD_IEEE R1.y, PV.y, R5.z, R6.y VEC_102 0134 808064fe 40228003 z: MULADD_IEEE R1.z, PV.y, R3.y, R3.x 0136 01806001 80a288fe 21 x: MULADD_IEEE_sat R5.x, R1.x, R3.w, PV.z 0138 0080a001 a0a284fe y: MULADD_IEEE_sat R5.y, R1.x, R5.y, PV.y 0140 01008001 c0a280fe z: MULADD_IEEE_sat R5.z, R1.x, R4.z, PV.x 0142 80000c04 e0a01910 w: MOV_sat R5.w, R4.w 0004 c000203c 94400688 EXPORT_DONE POS 60 R0.xyzw VPM 0006 c002c000 94600688 EXPORT_DONE PARAM 0 R5.xyzw VPM EOP ===== SHADER_END =============================================================== -------------------------------------------------------------- FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main() #0 { main_body: %0 = call float @llvm.R600.load.input(i32 0) %1 = call float @llvm.R600.load.input(i32 1) %2 = call float @llvm.R600.load.input(i32 2) %3 = call float @llvm.R600.load.input(i32 3) %4 = insertelement <4 x float> undef, float %0, i32 0 %5 = insertelement <4 x float> %4, float %1, i32 1 %6 = insertelement <4 x float> %5, float %2, i32 2 %7 = insertelement <4 x float> %6, float %3, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %7, i32 0, i32 0) ret void } ; Function Attrs: readnone declare float @llvm.R600.load.input(i32) #1 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="0" } attributes #1 = { readnone } # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T0_W in %vreg0, %T0_Z in %vreg1, %T0_Y in %vreg2, %T0_X in %vreg3 BB#0: derived from LLVM BB %main_body Live Ins: %T0_W %T0_Z %T0_Y %T0_X R600_ExportSwz %T0_XYZW, 0, 0, 0, 1, 2, 3, 40, 1 CF_END_R600 # End machine code for function main. ===== SHADER #23 =========================================== PS/RV670/R600 ===== ===== 4 dw ===== 1 gprs ===== 1 stack ========================================== 0000 c0000000 94600688 EXPORT_DONE PIXEL 0 R0.xyzw VPM EOP ===== SHADER_END =============================================================== -------------------------------------------------------------- VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..12] DCL TEMP[0..3] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: DP3 TEMP[1].x, CONST[4], CONST[4] 5: RSQ TEMP[1].x, TEMP[1] 6: MUL TEMP[0], CONST[4], TEMP[1].xxxx 7: MOV TEMP[2].w, CONST[5].xxxx 8: MOV TEMP[3], CONST[6] 9: MOV_SAT OUT[1], TEMP[3] 10: DP3 TEMP[2].x, TEMP[0], CONST[7] 11: DP3 TEMP[2].y, TEMP[0], CONST[9] 12: LIT TEMP[1], TEMP[2] 13: ADD TEMP[3], CONST[10], TEMP[3] 14: MAD TEMP[3], TEMP[1].yyyy, CONST[11], TEMP[3] 15: MAD_SAT OUT[1].xyz, TEMP[1].zzzz, CONST[12], TEMP[3] 16: END ; ModuleID = 'tgsi' define void @main() #0 { main_body: %0 = call float @llvm.R600.load.input(i32 4) %1 = call float @llvm.R600.load.input(i32 5) %2 = call float @llvm.R600.load.input(i32 6) %3 = call float @llvm.R600.load.input(i32 7) %4 = load <4 x float> addrspace(8)* null %5 = extractelement <4 x float> %4, i32 0 %6 = fmul float %0, %5 %7 = load <4 x float> addrspace(8)* null %8 = extractelement <4 x float> %7, i32 1 %9 = fmul float %0, %8 %10 = load <4 x float> addrspace(8)* null %11 = extractelement <4 x float> %10, i32 2 %12 = fmul float %0, %11 %13 = load <4 x float> addrspace(8)* null %14 = extractelement <4 x float> %13, i32 3 %15 = fmul float %0, %14 %16 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %17 = extractelement <4 x float> %16, i32 0 %18 = fmul float %1, %17 %19 = fadd float %18, %6 %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %21 = extractelement <4 x float> %20, i32 1 %22 = fmul float %1, %21 %23 = fadd float %22, %9 %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %25 = extractelement <4 x float> %24, i32 2 %26 = fmul float %1, %25 %27 = fadd float %26, %12 %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %29 = extractelement <4 x float> %28, i32 3 %30 = fmul float %1, %29 %31 = fadd float %30, %15 %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = fmul float %2, %33 %35 = fadd float %34, %19 %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %37 = extractelement <4 x float> %36, i32 1 %38 = fmul float %2, %37 %39 = fadd float %38, %23 %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %41 = extractelement <4 x float> %40, i32 2 %42 = fmul float %2, %41 %43 = fadd float %42, %27 %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %45 = extractelement <4 x float> %44, i32 3 %46 = fmul float %2, %45 %47 = fadd float %46, %31 %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %49 = extractelement <4 x float> %48, i32 0 %50 = fmul float %3, %49 %51 = fadd float %50, %35 %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %53 = extractelement <4 x float> %52, i32 1 %54 = fmul float %3, %53 %55 = fadd float %54, %39 %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %57 = extractelement <4 x float> %56, i32 2 %58 = fmul float %3, %57 %59 = fadd float %58, %43 %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %61 = extractelement <4 x float> %60, i32 3 %62 = fmul float %3, %61 %63 = fadd float %62, %47 %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %65 = extractelement <4 x float> %64, i32 0 %66 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %67 = extractelement <4 x float> %66, i32 0 %68 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %69 = extractelement <4 x float> %68, i32 1 %70 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %71 = extractelement <4 x float> %70, i32 1 %72 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %73 = extractelement <4 x float> %72, i32 2 %74 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %75 = extractelement <4 x float> %74, i32 2 %76 = insertelement <4 x float> undef, float %65, i32 0 %77 = insertelement <4 x float> %76, float %69, i32 1 %78 = insertelement <4 x float> %77, float %73, i32 2 %79 = insertelement <4 x float> %78, float 0.000000e+00, i32 3 %80 = insertelement <4 x float> undef, float %67, i32 0 %81 = insertelement <4 x float> %80, float %71, i32 1 %82 = insertelement <4 x float> %81, float %75, i32 2 %83 = insertelement <4 x float> %82, float 0.000000e+00, i32 3 %84 = call float @llvm.AMDGPU.dp4(<4 x float> %79, <4 x float> %83) %85 = call float @fabs(float %84) %86 = call float @llvm.AMDGPU.rsq(float %85) %87 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %88 = extractelement <4 x float> %87, i32 0 %89 = fmul float %88, %86 %90 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %91 = extractelement <4 x float> %90, i32 1 %92 = fmul float %91, %86 %93 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %94 = extractelement <4 x float> %93, i32 2 %95 = fmul float %94, %86 %96 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %97 = extractelement <4 x float> %96, i32 0 %98 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %99 = extractelement <4 x float> %98, i32 0 %100 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %101 = extractelement <4 x float> %100, i32 1 %102 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %103 = extractelement <4 x float> %102, i32 2 %104 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %105 = extractelement <4 x float> %104, i32 3 %106 = call float @llvm.AMDIL.clamp.(float %99, float 0.000000e+00, float 1.000000e+00) %107 = call float @llvm.AMDIL.clamp.(float %101, float 0.000000e+00, float 1.000000e+00) %108 = call float @llvm.AMDIL.clamp.(float %103, float 0.000000e+00, float 1.000000e+00) %109 = call float @llvm.AMDIL.clamp.(float %105, float 0.000000e+00, float 1.000000e+00) %110 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) %111 = extractelement <4 x float> %110, i32 0 %112 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) %113 = extractelement <4 x float> %112, i32 1 %114 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) %115 = extractelement <4 x float> %114, i32 2 %116 = insertelement <4 x float> undef, float %89, i32 0 %117 = insertelement <4 x float> %116, float %92, i32 1 %118 = insertelement <4 x float> %117, float %95, i32 2 %119 = insertelement <4 x float> %118, float 0.000000e+00, i32 3 %120 = insertelement <4 x float> undef, float %111, i32 0 %121 = insertelement <4 x float> %120, float %113, i32 1 %122 = insertelement <4 x float> %121, float %115, i32 2 %123 = insertelement <4 x float> %122, float 0.000000e+00, i32 3 %124 = call float @llvm.AMDGPU.dp4(<4 x float> %119, <4 x float> %123) %125 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %126 = extractelement <4 x float> %125, i32 0 %127 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %128 = extractelement <4 x float> %127, i32 1 %129 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %130 = extractelement <4 x float> %129, i32 2 %131 = insertelement <4 x float> undef, float %89, i32 0 %132 = insertelement <4 x float> %131, float %92, i32 1 %133 = insertelement <4 x float> %132, float %95, i32 2 %134 = insertelement <4 x float> %133, float 0.000000e+00, i32 3 %135 = insertelement <4 x float> undef, float %126, i32 0 %136 = insertelement <4 x float> %135, float %128, i32 1 %137 = insertelement <4 x float> %136, float %130, i32 2 %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 3 %139 = call float @llvm.AMDGPU.dp4(<4 x float> %134, <4 x float> %138) %140 = fcmp uge float %124, 0.000000e+00 %141 = select i1 %140, float %124, float 0.000000e+00 %142 = fcmp uge float %139, 0.000000e+00 %143 = select i1 %142, float %139, float 0.000000e+00 %144 = call float @llvm.pow.f32(float %143, float %97) %145 = fcmp ult float %124, 0.000000e+00 %146 = select i1 %145, float 0.000000e+00, float %144 %147 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %148 = extractelement <4 x float> %147, i32 0 %149 = fadd float %148, %99 %150 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %151 = extractelement <4 x float> %150, i32 1 %152 = fadd float %151, %101 %153 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %154 = extractelement <4 x float> %153, i32 2 %155 = fadd float %154, %103 %156 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) %157 = extractelement <4 x float> %156, i32 0 %158 = fmul float %141, %157 %159 = fadd float %158, %149 %160 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) %161 = extractelement <4 x float> %160, i32 1 %162 = fmul float %141, %161 %163 = fadd float %162, %152 %164 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) %165 = extractelement <4 x float> %164, i32 2 %166 = fmul float %141, %165 %167 = fadd float %166, %155 %168 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12) %169 = extractelement <4 x float> %168, i32 0 %170 = fmul float %146, %169 %171 = fadd float %170, %159 %172 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12) %173 = extractelement <4 x float> %172, i32 1 %174 = fmul float %146, %173 %175 = fadd float %174, %163 %176 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12) %177 = extractelement <4 x float> %176, i32 2 %178 = fmul float %146, %177 %179 = fadd float %178, %167 %180 = call float @llvm.AMDIL.clamp.(float %171, float 0.000000e+00, float 1.000000e+00) %181 = call float @llvm.AMDIL.clamp.(float %175, float 0.000000e+00, float 1.000000e+00) %182 = call float @llvm.AMDIL.clamp.(float %179, float 0.000000e+00, float 1.000000e+00) %183 = insertelement <4 x float> undef, float %51, i32 0 %184 = insertelement <4 x float> %183, float %55, i32 1 %185 = insertelement <4 x float> %184, float %59, i32 2 %186 = insertelement <4 x float> %185, float %63, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %186, i32 60, i32 1) %187 = insertelement <4 x float> undef, float %180, i32 0 %188 = insertelement <4 x float> %187, float %181, i32 1 %189 = insertelement <4 x float> %188, float %182, i32 2 %190 = insertelement <4 x float> %189, float %109, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %190, i32 0, i32 2) ret void } ; Function Attrs: readnone declare float @llvm.R600.load.input(i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.rsq(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #1 ; Function Attrs: nounwind readonly declare float @llvm.pow.f32(float, float) #3 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } attributes #1 = { readnone } attributes #2 = { readonly } attributes #3 = { nounwind readonly } # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T1_W in %vreg0, %T1_Z in %vreg1, %T1_Y in %vreg2, %T1_X in %vreg3 BB#0: derived from LLVM BB %main_body Live Ins: %T1_W %T1_Z %T1_Y %T1_X CF_CALL_FS_R600 CF_ALU 0, 0, 0, 2, 0, 0, 0, 63 BUNDLE %T0_X, %T0_Y, %T0_Z, %T0_W, %T1_X, %KC0_128_X, %PRED_SEL_OFF, %KC0_128_Y, %KC0_128_Z, %KC0_128_W * %T0_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_X, 0, 0, 0, 2048, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_Y, 0, 0, 0, 2049, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_Z, 0, 0, 0, 2050, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_W = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_W, 0, 0, 0, 2051, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T0_X, %T0_Y, %T0_Z, %T0_W, %T1_Y, %KC0_129_X, %PV_X, %PRED_SEL_OFF, %KC0_129_Y, %T0_Y, %KC0_129_Z, %T0_Z, %KC0_129_W, %T0_W * %T0_X = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_X, 0, 0, 2052, %PV_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_Y = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_Y, 0, 0, 2053, %T0_Y, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_Z = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_Z, 0, 0, 2054, %T0_Z, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_W = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_W, 0, 0, 2055, %T0_W, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T0_X, %T3_Y, %T2_Z, %T2_W, %T1_Z, %KC0_130_X, %T0_X, %PRED_SEL_OFF, %KC0_130_Y, %T0_Y, %KC0_130_Z, %T0_Z, %KC0_130_W, %T0_W * %T0_X = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_X, 0, 0, 2056, %T0_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T3_Y = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_Y, 0, 0, 2057, %T0_Y, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T2_Z = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_Z, 0, 0, 2058, %T0_Z, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T2_W = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_W, 0, 0, 2059, %T0_W, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T0_X, %T0_XYZW, %T0_Y, %T0_Z, %T0_W, %T1_Y, %T1_W, %KC0_131_X, %T0_X, %PRED_SEL_OFF, %KC0_134_W * %T0_X = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_X, 0, 0, 2060, %T0_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW * %T1_Y = MOV 1, 0, 0, 0, %KC0_134_W, 0, 0, 0, 2075, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T3_X, %T2_Y, %KC0_138_X, %KC0_134_X, %PRED_SEL_OFF, %KC0_138_Y, %KC0_134_Y * %T3_X = ADD 0, 0, 1, 0, 0, 0, %KC0_138_X, 0, 0, 0, 2088, %KC0_134_X, 0, 0, 0, 2072, 0, pred:%PRED_SEL_OFF, 0, 0 * %T2_Y = ADD 0, 0, 1, 0, 0, 0, %KC0_138_Y, 0, 0, 0, 2089, %KC0_134_Y, 0, 0, 0, 2073, 1, pred:%PRED_SEL_OFF, 0, 0 %T4_X = ADD 0, 0, 1, 0, 0, 0, %KC0_138_Z, 0, 0, 0, 2090, %KC0_134_Z, 0, 0, 0, 2074, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T2_X, %T0_Y, %T0_XYZW, %T0_X, %T0_Z, %T0_W, %KC0_133_X, %PRED_SEL_OFF, %T1_W, %KC0_131_Y, %T3_Y, %T0_XYZW * %T2_X = MOV 1, 0, 0, 0, %KC0_133_X, 0, 0, 0, 2068, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_Y = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_Y, 0, 0, 2061, %T3_Y, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW, %T0_XYZW BUNDLE %T5_X, %T5_XYZW, %T5_Y, %T5_Z, %T5_W, %T7_Z, %T7_XYZW, %T7_X, %T7_Y, %T7_W, %KC0_137_X, %PRED_SEL_OFF, %KC0_135_Z * %T5_X = MOV 1, 0, 0, 0, %KC0_137_X, 0, 0, 0, 2084, 0, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW * %T7_Z = MOV 1, 0, 0, 0, %KC0_135_Z, 0, 0, 0, 2078, 1, pred:%PRED_SEL_OFF, 0, 0, %T7_XYZW BUNDLE %T5_Y, %T5_XYZW, %T5_X, %T5_Z, %T5_W, %T6_Z, %T6_XYZW, %T6_X, %T6_Y, %T6_W, %KC0_137_Y, %PRED_SEL_OFF, %T5_XYZW, %KC0_132_Z * %T5_Y = MOV 1, 0, 0, 0, %KC0_137_Y, 0, 0, 0, 2085, 0, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW, %T5_XYZW * %T6_Z = MOV 1, 0, 0, 0, %KC0_132_Z, 0, 0, 0, 2066, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW BUNDLE %T7_Y, %T7_XYZW, %T7_X, %T7_Z, %T7_W, %T5_Z, %T5_XYZW, %T5_X, %T5_Y, %T5_W, %KC0_135_Y, %PRED_SEL_OFF, %T7_XYZW, %KC0_137_Z, %T5_XYZW * %T7_Y = MOV 1, 0, 0, 0, %KC0_135_Y, 0, 0, 0, 2077, 0, pred:%PRED_SEL_OFF, 0, 0, %T7_XYZW, %T7_XYZW * %T5_Z = MOV 1, 0, 0, 0, %KC0_137_Z, 0, 0, 0, 2086, 1, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW, %T5_XYZW BUNDLE %T7_X, %T7_XYZW, %T7_Y, %T7_Z, %T7_W, %T6_Y, %T6_XYZW, %T6_X, %T6_Z, %T6_W, %KC0_135_X, %PRED_SEL_OFF, %T7_XYZW, %KC0_132_Y, %T6_XYZW * %T7_X = MOV 1, 0, 0, 0, %KC0_135_X, 0, 0, 0, 2076, 0, pred:%PRED_SEL_OFF, 0, 0, %T7_XYZW, %T7_XYZW * %T6_Y = MOV 1, 0, 0, 0, %KC0_132_Y, 0, 0, 0, 2065, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW, %T6_XYZW BUNDLE %T6_X, %T6_XYZW, %T6_Y, %T6_Z, %T6_W, %T0_W, %T0_XYZW, %T0_X, %T0_Y, %T0_Z, %KC0_132_X, %PRED_SEL_OFF, %T6_XYZW, %T1_W, %KC0_131_W, %T2_W, %T0_XYZW * %T6_X = MOV 1, 0, 0, 0, %KC0_132_X, 0, 0, 0, 2064, 0, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW, %T6_XYZW * %T0_W = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_W, 0, 0, 2063, %T2_W, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW, %T0_XYZW BUNDLE %T0_Z, %T0_XYZW, %T0_X, %T0_Y, %T0_W, %T6_W, %T6_XYZW, %T6_X, %T6_Y, %T6_Z, %T1_W, %KC0_131_Z, %T2_Z, %PRED_SEL_OFF, %T0_XYZW, %ALU_LITERAL_X, %T6_XYZW * %T0_Z = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_Z, 0, 0, 2062, %T2_Z, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW, %T0_XYZW * %T6_W = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW, %T6_XYZW BUNDLE %T1_X, %T1_Y, %T1_Z, %T1_W, %T6_X, %PRED_SEL_OFF, %T6_Y, %T6_Z, %T6_W * %T1_X = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T6_X, 0, 0, 0, -1, %T6_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Y = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_Y, 0, 0, 0, -1, %T6_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Z = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_Z, 0, 0, 0, -1, %T6_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_W = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_W, 0, 0, 0, -1, %T6_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 %T1_X = RECIPSQRT_CLAMPED_r600 1, 0, 0, 0, %T1_X, 0, 0, 1, -1, 1, pred:%PRED_SEL_OFF, 0, 0 %T6_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Z, 0, 0, 0, 2066, %T1_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW, %T6_XYZW %T6_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_Y, 0, 0, 0, 2065, %T1_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW, %T6_XYZW %T6_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %KC0_132_X, 0, 0, 0, 2064, %T1_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW, %T6_XYZW %T7_W = MOV 1, 0, 0, 0, %T6_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T7_XYZW BUNDLE %T1_X, %T1_Y, %T1_Z, %T1_W, %T6_X, %T7_X, %PRED_SEL_OFF, %T6_Y, %T7_Y, %T6_Z, %T7_Z, %T6_W, %T7_W * %T1_X = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_X, 0, 0, 0, -1, %T7_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Y = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_Y, 0, 0, 0, -1, %T7_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Z = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_Z, 0, 0, 0, -1, %T7_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_W = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T6_W, 0, 0, 0, -1, %T7_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T7_X, %T5_W, %T5_XYZW, %T5_X, %T5_Y, %T5_Z, %PV_W, %ZERO, %PRED_SEL_OFF, %T6_W * %T7_X = CNDGE_r600 0, 0, %PV_W, 0, 0, -1, %PV_W, 0, 0, -1, %ZERO, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T5_W = MOV 1, 0, 0, 0, %T6_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T5_XYZW BUNDLE %T2_X, %T2_Y, %T2_Z, %T2_W, %T6_X, %T5_X, %PRED_SEL_OFF, %T6_Y, %T5_Y, %T6_Z, %T5_Z, %T6_W, %T5_W * %T2_X = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_X, 0, 0, 0, -1, %T5_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T2_Y = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_Y, 0, 0, 0, -1, %T5_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T2_Z = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T6_Z, 0, 0, 0, -1, %T5_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T2_W = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T6_W, 0, 0, 0, -1, %T5_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T1_X, %T2_Y, %T1_Z, %T2_W, %T7_X, %KC0_139_Y, %T2_Y, %PRED_SEL_OFF, %KC0_139_Z, %T4_X, %KC0_139_X, %T3_X, %T2_Z, %ZERO * %T1_X = MULADD_IEEE_r600 0, 0, %T7_X, 0, 0, -1, %KC0_139_Y, 0, 0, 2093, %T2_Y, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T2_Y = MULADD_IEEE_r600 0, 0, %T7_X, 0, 0, -1, %KC0_139_Z, 0, 0, 2094, %T4_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Z = MULADD_IEEE_r600 0, 0, %T7_X, 0, 0, -1, %KC0_139_X, 0, 0, 2092, %T3_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 1 * %T2_W = CNDGE_r600 0, 0, %T2_Z, 0, 0, -1, %T2_Z, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 %T3_X = LOG_IEEE_r600 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 %T2_X = MUL 0, 0, 1, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T3_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 %T2_X = EXP_IEEE_r600 1, 0, 0, 0, %T2_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 %T2_X = CNDGE_r600 0, 0, %T1_W, 0, 0, -1, %T2_X, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T3_X, %T3_XYZW, %T3_Y, %T3_Z, %T3_W, %T2_X, %KC0_140_X, %T1_Z, %PRED_SEL_OFF, %KC0_140_Z, %T2_Y * %T3_X = MULADD_IEEE_r600 0, 1, %T2_X, 0, 0, -1, %KC0_140_X, 0, 0, 2096, %T1_Z, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW * %T3_Z = MULADD_IEEE_r600 0, 1, %T2_X, 0, 0, -1, %KC0_140_Z, 0, 0, 2098, %T2_Y, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW, %T3_XYZW BUNDLE %T3_Y, %T3_XYZW, %T3_X, %T3_Z, %T3_W, %T2_X, %KC0_140_Y, %T1_X, %PRED_SEL_OFF, %T3_XYZW, %T1_Y * %T3_Y = MULADD_IEEE_r600 0, 1, %T2_X, 0, 0, -1, %KC0_140_Y, 0, 0, 2097, %T1_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW, %T3_XYZW * %T3_W = MOV 1, 0, 0, 1, %T1_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW, %T3_XYZW R600_ExportSwz %T0_XYZW, 1, 60, 0, 1, 2, 3, 40, 0 R600_ExportSwz %T3_XYZW, 2, 0, 0, 1, 2, 3, 40, 1 CF_END_R600 PAD # End machine code for function main. ===== SHADER #24 =========================================== VS/RV670/R600 ===== ===== 136 dw ===== 8 gprs ===== 1 stack ======================================== 0000 00000000 89800000 CALL_FS @0 0002 80000006 a0f40000 ALU 62 @12 KC0[CB0:0-31] 0012 00100001 00000210 1 x: MUL_IEEE R0.x, R1.x, KC0[0].x 0014 00900001 20000210 y: MUL_IEEE R0.y, R1.x, KC0[0].y 0016 01100001 40000210 z: MUL_IEEE R0.z, R1.x, KC0[0].z 0018 81900001 60000210 w: MUL_IEEE R0.w, R1.x, KC0[0].w 0020 00102401 000280fe 2 x: MULADD_IEEE R0.x, R1.y, KC0[1].x, PV.x 0022 00902401 200284fe y: MULADD_IEEE R0.y, R1.y, KC0[1].y, PV.y 0024 01102401 400288fe z: MULADD_IEEE R0.z, R1.y, KC0[1].z, PV.z 0026 81902401 60028cfe w: MULADD_IEEE R0.w, R1.y, KC0[1].w, PV.w 0028 00104801 000280fe 3 x: MULADD_IEEE R0.x, R1.z, KC0[2].x, PV.x 0030 00904801 206284fe y: MULADD_IEEE R3.y, R1.z, KC0[2].y, PV.y 0032 01104801 404288fe z: MULADD_IEEE R2.z, R1.z, KC0[2].z, PV.z 0034 81904801 60428cfe w: MULADD_IEEE R2.w, R1.z, KC0[2].w, PV.w 0036 00106c01 000280fe 4 x: MULADD_IEEE R0.x, R1.w, KC0[3].x, PV.x 0038 80000c86 20201910 y: MOV R1.y, KC0[6].w 0040 0010c08a 00600010 5 x: ADD R3.x, KC0[10].x, KC0[6].x 0042 8090c48a 20400010 y: ADD R2.y, KC0[10].y, KC0[6].y 0044 0110c88a 00800010 6 x: ADD R4.x, KC0[10].z, KC0[6].z 0046 00906c01 20028403 y: MULADD_IEEE R0.y, R1.w, KC0[3].y, R3.y 0048 80000085 00401910 t: MOV R2.x, KC0[5].x 0050 00000089 00a01910 7 x: MOV R5.x, KC0[9].x 0052 00000489 20a01910 y: MOV R5.y, KC0[9].y 0054 00000887 40e01910 z: MOV R7.z, KC0[7].z 0056 80000884 40c01910 t: MOV R6.z, KC0[4].z 0058 00000087 00e01910 8 x: MOV R7.x, KC0[7].x 0060 00000487 20e01910 y: MOV R7.y, KC0[7].y 0062 00000889 40a01910 z: MOV R5.z, KC0[9].z 0064 80000484 20c01910 t: MOV R6.y, KC0[4].y 0066 00000084 00c01910 9 x: MOV R6.x, KC0[4].x 0068 01106c01 40028802 z: MULADD_IEEE R0.z, R1.w, KC0[3].z, R2.z 0070 01906c01 60028c02 w: MULADD_IEEE R0.w, R1.w, KC0[3].w, R2.w 0072 800000f8 60c01910 t: MOV R6.w, 0 0074 001fc0fe 00205010 10 x: DOT4 R1.x, PV.x, PV.x 0076 0080c406 20205000 y: DOT4 __.y, R6.y, R6.y 0078 0100c806 40205000 z: DOT4 __.z, R6.z, R6.z 0080 801fe0ff 60205000 w: DOT4 __.w, PS, PS 0082 800000fe 00206711 11 t: RECIPSQRT_CLAMPED R1.x, |PV.x| 0084 001fe084 00c00210 12 x: MUL_IEEE R6.x, KC0[4].x, PS 0086 001fe484 20c00210 y: MUL_IEEE R6.y, KC0[4].y, PS 0088 001fe884 40c00210 z: MUL_IEEE R6.z, KC0[4].z, PS 0090 80000c06 60e01910 w: MOV R7.w, R6.w 0092 0000e0fe 00205000 13 x: DOT4 __.x, PV.x, R7.x 0094 0080e4fe 20205000 y: DOT4 __.y, PV.y, R7.y 0096 0100e8fe 40205000 z: DOT4 __.z, PV.z, R7.z 0098 819fcc06 60205010 w: DOT4 R1.w, R6.w, PV.w 0100 019fccfe 00e340f8 14 x: CNDGE R7.x, PV.w, PV.w, 0 0102 80000c06 60a01910 w: MOV R5.w, R6.w 0104 0000a006 00405000 15 x: DOT4 __.x, R6.x, R5.x 0106 0080a406 20405000 y: DOT4 __.y, R6.y, R5.y 0108 0100a806 40405010 z: DOT4 R2.z, R6.z, R5.z 0110 819fcc06 60405000 w: DOT4 __.w, R6.w, PV.w 0112 00916007 00228402 16 x: MULADD_IEEE R1.x, R7.x, KC0[11].y, R2.y 0114 01116007 20428004 y: MULADD_IEEE R2.y, R7.x, KC0[11].z, R4.x 0116 00116007 40268003 z: MULADD_IEEE R1.z, R7.x, KC0[11].x, R3.x VEC_021 0118 801fc0fe 604340f8 w: CNDGE R2.w, PV.x, PV.x, 0 0120 80000cfe 00606310 17 t: LOG_IEEE R3.x, PV.w 0122 801fe002 00400110 18 x: MUL R2.x, R2.x, PS 0124 800000fe 00406110 19 t: EXP_IEEE R2.x, PV.x 0126 801fec01 004340f8 20 x: CNDGE R2.x, R1.w, PS, 0 0128 001180fe 80628801 21 x: MULADD_IEEE_sat R3.x, PV.x, KC0[12].x, R1.z 0130 009180fe a0628001 y: MULADD_IEEE_sat R3.y, PV.x, KC0[12].y, R1.x 0132 011180fe c0628402 z: MULADD_IEEE_sat R3.z, PV.x, KC0[12].z, R2.y 0134 80000401 e0601910 w: MOV_sat R3.w, R1.y 0004 c000203c 94400688 EXPORT_DONE POS 60 R0.xyzw VPM 0006 c001c000 94600688 EXPORT_DONE PARAM 0 R3.xyzw VPM EOP ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32_FLOAT, } ===== SHADER #25 ======================================== FETCH/RV670/R600 ===== ===== 8 dw ===== 2 gprs ===== 0 stack ========================================== 0000 00000002 81000000 VTX 1 @4 0004 7c00a000 8c151001 00080000 VFETCH R1.xyz1, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:48 NUM:0 COMP:0 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== -------------------------------------------------------------- FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main() #0 { main_body: %0 = call float @llvm.R600.load.input(i32 0) %1 = call float @llvm.R600.load.input(i32 1) %2 = call float @llvm.R600.load.input(i32 2) %3 = call float @llvm.R600.load.input(i32 3) %4 = insertelement <4 x float> undef, float %0, i32 0 %5 = insertelement <4 x float> %4, float %1, i32 1 %6 = insertelement <4 x float> %5, float %2, i32 2 %7 = insertelement <4 x float> %6, float %3, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %7, i32 0, i32 0) ret void } ; Function Attrs: readnone declare float @llvm.R600.load.input(i32) #1 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="0" } attributes #1 = { readnone } # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T0_W in %vreg0, %T0_Z in %vreg1, %T0_Y in %vreg2, %T0_X in %vreg3 BB#0: derived from LLVM BB %main_body Live Ins: %T0_W %T0_Z %T0_Y %T0_X R600_ExportSwz %T0_XYZW, 0, 0, 0, 1, 2, 3, 40, 1 CF_END_R600 # End machine code for function main. ===== SHADER #26 =========================================== PS/RV670/R600 ===== ===== 4 dw ===== 1 gprs ===== 1 stack ========================================== 0000 c0000000 94600688 EXPORT_DONE PIXEL 0 R0.xyzw VPM EOP ===== SHADER_END =============================================================== -------------------------------------------------------------- VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..11] DCL TEMP[0..3] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: DP3 TEMP[1].x, IN[1], IN[1] 5: RSQ TEMP[1].x, TEMP[1] 6: MUL TEMP[0], IN[1], TEMP[1].xxxx 7: MOV TEMP[2].w, CONST[4].xxxx 8: MOV TEMP[3], CONST[5] 9: MOV_SAT OUT[1], TEMP[3] 10: DP3 TEMP[2].x, TEMP[0], CONST[6] 11: DP3 TEMP[2].y, TEMP[0], CONST[8] 12: LIT TEMP[1], TEMP[2] 13: ADD TEMP[3], CONST[9], TEMP[3] 14: MAD TEMP[3], TEMP[1].yyyy, CONST[10], TEMP[3] 15: MAD_SAT OUT[1].xyz, TEMP[1].zzzz, CONST[11], TEMP[3] 16: END ; ModuleID = 'tgsi' define void @main() #0 { main_body: %0 = call float @llvm.R600.load.input(i32 4) %1 = call float @llvm.R600.load.input(i32 5) %2 = call float @llvm.R600.load.input(i32 6) %3 = call float @llvm.R600.load.input(i32 7) %4 = call float @llvm.R600.load.input(i32 8) %5 = call float @llvm.R600.load.input(i32 9) %6 = call float @llvm.R600.load.input(i32 10) %7 = call float @llvm.R600.load.input(i32 11) %8 = load <4 x float> addrspace(8)* null %9 = extractelement <4 x float> %8, i32 0 %10 = fmul float %0, %9 %11 = load <4 x float> addrspace(8)* null %12 = extractelement <4 x float> %11, i32 1 %13 = fmul float %0, %12 %14 = load <4 x float> addrspace(8)* null %15 = extractelement <4 x float> %14, i32 2 %16 = fmul float %0, %15 %17 = load <4 x float> addrspace(8)* null %18 = extractelement <4 x float> %17, i32 3 %19 = fmul float %0, %18 %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %21 = extractelement <4 x float> %20, i32 0 %22 = fmul float %1, %21 %23 = fadd float %22, %10 %24 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %25 = extractelement <4 x float> %24, i32 1 %26 = fmul float %1, %25 %27 = fadd float %26, %13 %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %29 = extractelement <4 x float> %28, i32 2 %30 = fmul float %1, %29 %31 = fadd float %30, %16 %32 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) %33 = extractelement <4 x float> %32, i32 3 %34 = fmul float %1, %33 %35 = fadd float %34, %19 %36 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %37 = extractelement <4 x float> %36, i32 0 %38 = fmul float %2, %37 %39 = fadd float %38, %23 %40 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %41 = extractelement <4 x float> %40, i32 1 %42 = fmul float %2, %41 %43 = fadd float %42, %27 %44 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %45 = extractelement <4 x float> %44, i32 2 %46 = fmul float %2, %45 %47 = fadd float %46, %31 %48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) %49 = extractelement <4 x float> %48, i32 3 %50 = fmul float %2, %49 %51 = fadd float %50, %35 %52 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %53 = extractelement <4 x float> %52, i32 0 %54 = fmul float %3, %53 %55 = fadd float %54, %39 %56 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %57 = extractelement <4 x float> %56, i32 1 %58 = fmul float %3, %57 %59 = fadd float %58, %43 %60 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %61 = extractelement <4 x float> %60, i32 2 %62 = fmul float %3, %61 %63 = fadd float %62, %47 %64 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) %65 = extractelement <4 x float> %64, i32 3 %66 = fmul float %3, %65 %67 = fadd float %66, %51 %68 = insertelement <4 x float> undef, float %4, i32 0 %69 = insertelement <4 x float> %68, float %5, i32 1 %70 = insertelement <4 x float> %69, float %6, i32 2 %71 = insertelement <4 x float> %70, float 0.000000e+00, i32 3 %72 = insertelement <4 x float> undef, float %4, i32 0 %73 = insertelement <4 x float> %72, float %5, i32 1 %74 = insertelement <4 x float> %73, float %6, i32 2 %75 = insertelement <4 x float> %74, float 0.000000e+00, i32 3 %76 = call float @llvm.AMDGPU.dp4(<4 x float> %71, <4 x float> %75) %77 = call float @fabs(float %76) %78 = call float @llvm.AMDGPU.rsq(float %77) %79 = fmul float %4, %78 %80 = fmul float %5, %78 %81 = fmul float %6, %78 %82 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) %83 = extractelement <4 x float> %82, i32 0 %84 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %85 = extractelement <4 x float> %84, i32 0 %86 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %87 = extractelement <4 x float> %86, i32 1 %88 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %89 = extractelement <4 x float> %88, i32 2 %90 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) %91 = extractelement <4 x float> %90, i32 3 %92 = call float @llvm.AMDIL.clamp.(float %85, float 0.000000e+00, float 1.000000e+00) %93 = call float @llvm.AMDIL.clamp.(float %87, float 0.000000e+00, float 1.000000e+00) %94 = call float @llvm.AMDIL.clamp.(float %89, float 0.000000e+00, float 1.000000e+00) %95 = call float @llvm.AMDIL.clamp.(float %91, float 0.000000e+00, float 1.000000e+00) %96 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %97 = extractelement <4 x float> %96, i32 0 %98 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %99 = extractelement <4 x float> %98, i32 1 %100 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) %101 = extractelement <4 x float> %100, i32 2 %102 = insertelement <4 x float> undef, float %79, i32 0 %103 = insertelement <4 x float> %102, float %80, i32 1 %104 = insertelement <4 x float> %103, float %81, i32 2 %105 = insertelement <4 x float> %104, float 0.000000e+00, i32 3 %106 = insertelement <4 x float> undef, float %97, i32 0 %107 = insertelement <4 x float> %106, float %99, i32 1 %108 = insertelement <4 x float> %107, float %101, i32 2 %109 = insertelement <4 x float> %108, float 0.000000e+00, i32 3 %110 = call float @llvm.AMDGPU.dp4(<4 x float> %105, <4 x float> %109) %111 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %112 = extractelement <4 x float> %111, i32 0 %113 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %114 = extractelement <4 x float> %113, i32 1 %115 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) %116 = extractelement <4 x float> %115, i32 2 %117 = insertelement <4 x float> undef, float %79, i32 0 %118 = insertelement <4 x float> %117, float %80, i32 1 %119 = insertelement <4 x float> %118, float %81, i32 2 %120 = insertelement <4 x float> %119, float 0.000000e+00, i32 3 %121 = insertelement <4 x float> undef, float %112, i32 0 %122 = insertelement <4 x float> %121, float %114, i32 1 %123 = insertelement <4 x float> %122, float %116, i32 2 %124 = insertelement <4 x float> %123, float 0.000000e+00, i32 3 %125 = call float @llvm.AMDGPU.dp4(<4 x float> %120, <4 x float> %124) %126 = fcmp uge float %110, 0.000000e+00 %127 = select i1 %126, float %110, float 0.000000e+00 %128 = fcmp uge float %125, 0.000000e+00 %129 = select i1 %128, float %125, float 0.000000e+00 %130 = call float @llvm.pow.f32(float %129, float %83) %131 = fcmp ult float %110, 0.000000e+00 %132 = select i1 %131, float 0.000000e+00, float %130 %133 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %134 = extractelement <4 x float> %133, i32 0 %135 = fadd float %134, %85 %136 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %137 = extractelement <4 x float> %136, i32 1 %138 = fadd float %137, %87 %139 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) %140 = extractelement <4 x float> %139, i32 2 %141 = fadd float %140, %89 %142 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %143 = extractelement <4 x float> %142, i32 0 %144 = fmul float %127, %143 %145 = fadd float %144, %135 %146 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %147 = extractelement <4 x float> %146, i32 1 %148 = fmul float %127, %147 %149 = fadd float %148, %138 %150 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) %151 = extractelement <4 x float> %150, i32 2 %152 = fmul float %127, %151 %153 = fadd float %152, %141 %154 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) %155 = extractelement <4 x float> %154, i32 0 %156 = fmul float %132, %155 %157 = fadd float %156, %145 %158 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) %159 = extractelement <4 x float> %158, i32 1 %160 = fmul float %132, %159 %161 = fadd float %160, %149 %162 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) %163 = extractelement <4 x float> %162, i32 2 %164 = fmul float %132, %163 %165 = fadd float %164, %153 %166 = call float @llvm.AMDIL.clamp.(float %157, float 0.000000e+00, float 1.000000e+00) %167 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00) %168 = call float @llvm.AMDIL.clamp.(float %165, float 0.000000e+00, float 1.000000e+00) %169 = insertelement <4 x float> undef, float %55, i32 0 %170 = insertelement <4 x float> %169, float %59, i32 1 %171 = insertelement <4 x float> %170, float %63, i32 2 %172 = insertelement <4 x float> %171, float %67, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %172, i32 60, i32 1) %173 = insertelement <4 x float> undef, float %166, i32 0 %174 = insertelement <4 x float> %173, float %167, i32 1 %175 = insertelement <4 x float> %174, float %168, i32 2 %176 = insertelement <4 x float> %175, float %95, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %176, i32 0, i32 2) ret void } ; Function Attrs: readnone declare float @llvm.R600.load.input(i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.rsq(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #1 ; Function Attrs: nounwind readonly declare float @llvm.pow.f32(float, float) #3 declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) attributes #0 = { "ShaderType"="1" } attributes #1 = { readnone } attributes #2 = { readonly } attributes #3 = { nounwind readonly } # Machine code for function main: Post SSA, not tracking liveness Function Live Ins: %T2_Z in %vreg0, %T2_Y in %vreg1, %T2_X in %vreg2, %T1_W in %vreg3, %T1_Z in %vreg4, %T1_Y in %vreg5, %T1_X in %vreg6 BB#0: derived from LLVM BB %main_body Live Ins: %T2_Z %T2_Y %T2_X %T1_W %T1_Z %T1_Y %T1_X CF_CALL_FS_R600 CF_ALU 0, 0, 0, 2, 0, 0, 0, 60 BUNDLE %T0_X, %T0_Y, %T0_Z, %T2_W, %T2_XYZW, %T2_X, %T2_Y, %T2_Z, %T1_X, %KC0_128_X, %PRED_SEL_OFF, %KC0_128_Y, %KC0_128_Z, %ALU_LITERAL_X, %T2_XYZW * %T0_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_X, 0, 0, 0, 2048, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_Y, 0, 0, 0, 2049, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_Z, 0, 0, 0, 2050, 0, pred:%PRED_SEL_OFF, 0, 0 * %T2_W = MOV 1, 0, 0, 0, %ALU_LITERAL_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T2_XYZW, %T2_XYZW BUNDLE %T0_X, %T0_Y, %T0_Z, %T0_W, %T2_X, %PRED_SEL_OFF, %T2_Y, %T2_Z, %T2_W * %T0_X = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T2_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_Y = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T2_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_Z = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T2_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_W = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 %T3_X = RECIPSQRT_CLAMPED_r600 1, 0, 0, 0, %T0_W, 0, 0, 1, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T3_Y, %T3_Z, %T0_W, %T1_Y, %KC0_129_X, %T0_X, %PRED_SEL_OFF, %KC0_129_Y, %T0_Y, %KC0_129_Z, %T0_Z * %T3_Y = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_X, 0, 0, 2052, %T0_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T3_Z = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_Y, 0, 0, 2053, %T0_Y, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T0_W = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_Z, 0, 0, 2054, %T0_Z, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T0_X, %T3_Y, %T2_Z, %T2_XYZW, %T2_X, %T2_Y, %T2_W, %T3_W, %T1_Z, %KC0_130_X, %T3_Y, %PRED_SEL_OFF, %KC0_130_Y, %T3_Z, %T2_Z, %T3_X, %T2_XYZW, %KC0_130_Z, %T0_W * %T0_X = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_X, 0, 0, 2056, %T3_Y, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T3_Y = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_Y, 0, 0, 2057, %T3_Z, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T2_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T3_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 2, %T2_XYZW, %T2_XYZW * %T3_W = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_Z, 0, 0, 2058, %T0_W, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T0_X, %T0_XYZW, %T0_Y, %T0_Z, %T0_W, %T2_Y, %T2_XYZW, %T2_X, %T2_Z, %T2_W, %T3_Z, %T1_W, %KC0_131_X, %T0_X, %PRED_SEL_OFF, %T2_Y, %T3_X, %T2_XYZW, %T1_X, %KC0_128_W * %T0_X = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_X, 0, 0, 2060, %T0_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW * %T2_Y = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T3_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T2_XYZW, %T2_XYZW * %T3_Z = MUL_IEEE 0, 0, 1, 0, 0, 0, %T1_X, 0, 0, 0, -1, %KC0_128_W, 0, 0, 0, 2051, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T2_X, %T2_XYZW, %T2_Y, %T2_Z, %T2_W, %T0_Y, %T0_XYZW, %T0_X, %T0_Z, %T0_W, %T3_Z, %T2_X, %T3_X, %PRED_SEL_OFF, %T2_XYZW, %T1_W, %KC0_131_Y, %T3_Y, %T0_XYZW, %T1_Y, %KC0_129_W, %T3_Z * %T2_X = MUL_IEEE 0, 0, 1, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T3_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T2_XYZW, %T2_XYZW * %T0_Y = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_Y, 0, 0, 2061, %T3_Y, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW, %T0_XYZW * %T3_Z = MULADD_IEEE_r600 0, 0, %T1_Y, 0, 0, -1, %KC0_129_W, 0, 0, 2055, %T3_Z, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T4_X, %T5_Y, %T1_Z, %KC0_130_W, %T3_Z, %PRED_SEL_OFF, %KC0_132_X * %T4_X = MULADD_IEEE_r600 0, 0, %T1_Z, 0, 0, -1, %KC0_130_W, 0, 0, 2059, %T3_Z, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T5_Y = MOV 1, 0, 0, 0, %KC0_132_X, 0, 0, 0, 2064, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T1_X, %T3_Y, %KC0_137_X, %KC0_133_X, %PRED_SEL_OFF, %KC0_137_Y, %KC0_133_Y * %T1_X = ADD 0, 0, 1, 0, 0, 0, %KC0_137_X, 0, 0, 0, 2084, %KC0_133_X, 0, 0, 0, 2068, 0, pred:%PRED_SEL_OFF, 0, 0 * %T3_Y = ADD 0, 0, 1, 0, 0, 0, %KC0_137_Y, 0, 0, 0, 2085, %KC0_133_Y, 0, 0, 0, 2069, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T3_X, %T1_Y, %KC0_137_Z, %KC0_133_Z, %PRED_SEL_OFF, %KC0_133_W * %T3_X = ADD 0, 0, 1, 0, 0, 0, %KC0_137_Z, 0, 0, 0, 2086, %KC0_133_Z, 0, 0, 0, 2070, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Y = MOV 1, 0, 0, 0, %KC0_133_W, 0, 0, 0, 2071, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T6_Z, %T6_XYZW, %T6_X, %T6_Y, %T6_W, %T0_W, %T0_XYZW, %T0_X, %T0_Y, %T0_Z, %KC0_136_Z, %PRED_SEL_OFF, %T1_W, %KC0_131_W, %T4_X, %T0_XYZW * %T6_Z = MOV 1, 0, 0, 0, %KC0_136_Z, 0, 0, 0, 2082, 0, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW * %T0_W = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_W, 0, 0, 2063, %T4_X, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW, %T0_XYZW BUNDLE %T6_Y, %T6_XYZW, %T6_X, %T6_Z, %T6_W, %T0_Z, %T0_XYZW, %T0_X, %T0_Y, %T0_W, %KC0_136_Y, %PRED_SEL_OFF, %T6_XYZW, %T1_W, %KC0_131_Z, %T3_W, %T0_XYZW * %T6_Y = MOV 1, 0, 0, 0, %KC0_136_Y, 0, 0, 0, 2081, 0, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW, %T6_XYZW * %T0_Z = MULADD_IEEE_r600 0, 0, %T1_W, 0, 0, -1, %KC0_131_Z, 0, 0, 2062, %T3_W, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T0_XYZW, %T0_XYZW BUNDLE %T6_X, %T6_XYZW, %T6_Y, %T6_Z, %T6_W, %T4_Z, %T4_XYZW, %T4_X, %T4_Y, %T4_W, %KC0_136_X, %PRED_SEL_OFF, %T6_XYZW, %KC0_134_Z * %T6_X = MOV 1, 0, 0, 0, %KC0_136_X, 0, 0, 0, 2080, 0, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW, %T6_XYZW * %T4_Z = MOV 1, 0, 0, 0, %KC0_134_Z, 0, 0, 0, 2074, 1, pred:%PRED_SEL_OFF, 0, 0, %T4_XYZW BUNDLE %T4_Y, %T4_XYZW, %T4_X, %T4_Z, %T4_W, %T6_W, %T6_XYZW, %T6_X, %T6_Y, %T6_Z, %KC0_134_Y, %PRED_SEL_OFF, %T4_XYZW, %T2_W * %T4_Y = MOV 1, 0, 0, 0, %KC0_134_Y, 0, 0, 0, 2073, 0, pred:%PRED_SEL_OFF, 0, 0, %T4_XYZW, %T4_XYZW * %T6_W = MOV 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T6_XYZW BUNDLE %T1_X, %T1_Y, %T1_Z, %T1_W, %T2_X, %T6_X, %PRED_SEL_OFF, %T2_Y, %T6_Y, %T2_Z, %T6_Z, %T2_W, %T6_W * %T1_X = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T6_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Y = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T6_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Z = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T6_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_W = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T6_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 %T5_X = CNDGE_r600 0, 0, %PV_Z, 0, 0, -1, %PV_Z, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 %T5_X = LOG_IEEE_r600 1, 0, 0, 0, %T5_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 %T5_X = MUL 0, 0, 1, 0, 0, 0, %T5_Y, 0, 0, 0, -1, %T5_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 %T5_X = EXP_IEEE_r600 1, 0, 0, 0, %T5_X, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T4_X, %T4_XYZW, %T4_Y, %T4_Z, %T4_W, %KC0_134_X, %PRED_SEL_OFF, %T4_XYZW, %T2_W * %T4_X = MOV 1, 0, 0, 0, %KC0_134_X, 0, 0, 0, 2072, 0, pred:%PRED_SEL_OFF, 0, 0, %T4_XYZW, %T4_XYZW * %T4_W = MOV 1, 0, 0, 0, %T2_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T4_XYZW BUNDLE %T1_X, %T1_Y, %T1_Z, %T1_W, %T2_X, %T4_X, %PRED_SEL_OFF, %T2_Y, %T4_Y, %T2_Z, %T4_Z, %T2_W, %T4_W * %T1_X = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_X, 0, 0, 0, -1, %T4_X, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Y = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_Y, 0, 0, 0, -1, %T4_Y, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Z = DOT4_r600_real 0, 0, 1, 0, 0, 0, %T2_Z, 0, 0, 0, -1, %T4_Z, 0, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_W = DOT4_r600_real 0, 0, 0, 0, 0, 0, %T2_W, 0, 0, 0, -1, %T4_W, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T2_X, %T4_Y, %PV_Z, %T5_X, %ZERO, %PRED_SEL_OFF, %T1_Z * %T2_X = CNDGE_r600 0, 0, %PV_Z, 0, 0, -1, %T5_X, 0, 0, -1, %ZERO, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T4_Y = CNDGE_r600 0, 0, %T1_Z, 0, 0, -1, %T1_Z, 0, 0, -1, %ZERO, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0 BUNDLE %T4_X, %T2_Y, %T1_Z, %T4_Y, %KC0_138_Y, %T3_Y, %PRED_SEL_OFF, %KC0_138_Z, %T3_X, %KC0_138_X, %T1_X * %T4_X = MULADD_IEEE_r600 0, 0, %T4_Y, 0, 0, -1, %KC0_138_Y, 0, 0, 2089, %T3_Y, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T2_Y = MULADD_IEEE_r600 0, 0, %T4_Y, 0, 0, -1, %KC0_138_Z, 0, 0, 2090, %T3_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0 * %T1_Z = MULADD_IEEE_r600 0, 0, %T4_Y, 0, 0, -1, %KC0_138_X, 0, 0, 2088, %T1_X, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 1 BUNDLE %T3_X, %T3_XYZW, %T3_Y, %T3_Z, %T3_W, %T2_X, %KC0_139_X, %T1_Z, %PRED_SEL_OFF, %KC0_139_Z, %T2_Y * %T3_X = MULADD_IEEE_r600 0, 1, %T2_X, 0, 0, -1, %KC0_139_X, 0, 0, 2092, %T1_Z, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW * %T3_Z = MULADD_IEEE_r600 0, 1, %T2_X, 0, 0, -1, %KC0_139_Z, 0, 0, 2094, %T2_Y, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW, %T3_XYZW BUNDLE %T3_Y, %T3_XYZW, %T3_X, %T3_Z, %T3_W, %T2_X, %KC0_139_Y, %T4_X, %PRED_SEL_OFF, %T3_XYZW, %T1_Y * %T3_Y = MULADD_IEEE_r600 0, 1, %T2_X, 0, 0, -1, %KC0_139_Y, 0, 0, 2093, %T4_X, 0, 0, -1, 0, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW, %T3_XYZW * %T3_W = MOV 1, 0, 0, 1, %T1_Y, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0, 0, %T3_XYZW, %T3_XYZW R600_ExportSwz %T0_XYZW, 1, 60, 0, 1, 2, 3, 40, 0 R600_ExportSwz %T3_XYZW, 2, 0, 0, 1, 2, 3, 40, 1 CF_END_R600 PAD # End machine code for function main. ===== SHADER #27 =========================================== VS/RV670/R600 ===== ===== 130 dw ===== 7 gprs ===== 1 stack ======================================== 0000 00000000 89800000 CALL_FS @0 0002 80000006 a0e80000 ALU 59 @12 KC0[CB0:0-31] 0012 00100001 00000210 1 x: MUL_IEEE R0.x, R1.x, KC0[0].x 0014 00900001 20000210 y: MUL_IEEE R0.y, R1.x, KC0[0].y 0016 01100001 40000210 z: MUL_IEEE R0.z, R1.x, KC0[0].z 0018 800000f8 60401910 w: MOV R2.w, 0 0020 00004002 00005000 2 x: DOT4 __.x, R2.x, R2.x 0022 00804402 20005000 y: DOT4 __.y, R2.y, R2.y 0024 01004802 40005000 z: DOT4 __.z, R2.z, R2.z 0026 819fccfe 60005010 w: DOT4 R0.w, PV.w, PV.w 0028 00102401 20628000 3 y: MULADD_IEEE R3.y, R1.y, KC0[1].x, R0.x 0030 00902401 40628400 z: MULADD_IEEE R3.z, R1.y, KC0[1].y, R0.y 0032 01102401 60028800 w: MULADD_IEEE R0.w, R1.y, KC0[1].z, R0.z 0034 800000fe 00606711 t: RECIPSQRT_CLAMPED R3.x, |PV.x| 0036 00104801 000284fe 4 x: MULADD_IEEE R0.x, R1.z, KC0[2].x, PV.y 0038 00904801 206288fe y: MULADD_IEEE R3.y, R1.z, KC0[2].y, PV.z 0040 001fe802 40480210 z: MUL_IEEE R2.z, R2.z, PS VEC_120 0042 81104801 60628cfe w: MULADD_IEEE R3.w, R1.z, KC0[2].z, PV.w 0044 00106c01 000280fe 5 x: MULADD_IEEE R0.x, R1.w, KC0[3].x, PV.x 0046 00006402 20400210 y: MUL_IEEE R2.y, R2.y, R3.x 0048 81900001 40600210 z: MUL_IEEE R3.z, R1.x, KC0[0].w 0050 00006002 00400210 6 x: MUL_IEEE R2.x, R2.x, R3.x 0052 00906c01 20028403 y: MULADD_IEEE R0.y, R1.w, KC0[3].y, R3.y 0054 81902401 406288fe z: MULADD_IEEE R3.z, R1.y, KC0[1].w, PV.z 0056 01904801 008288fe 7 x: MULADD_IEEE R4.x, R1.z, KC0[2].w, PV.z 0058 80000084 20a01910 y: MOV R5.y, KC0[4].x 0060 0010a089 00200010 8 x: ADD R1.x, KC0[9].x, KC0[5].x 0062 8090a489 20600010 y: ADD R3.y, KC0[9].y, KC0[5].y 0064 0110a889 00600010 9 x: ADD R3.x, KC0[9].z, KC0[5].z 0066 80000c85 20201910 y: MOV R1.y, KC0[5].w 0068 00000488 20c01910 10 y: MOV R6.y, KC0[8].y 0070 00000888 40c01910 z: MOV R6.z, KC0[8].z 0072 01906c01 60028004 w: MULADD_IEEE R0.w, R1.w, KC0[3].w, R4.x 0074 81106c01 40068c03 t: MULADD_IEEE R0.z, R1.w, KC0[3].z, R3.w SCL_122 0076 00000088 00c01910 11 x: MOV R6.x, KC0[8].x 0078 00000486 20801910 y: MOV R4.y, KC0[6].y 0080 00000886 40801910 z: MOV R4.z, KC0[6].z 0082 80000c02 60c01910 w: MOV R6.w, R2.w 0084 001fc002 00205000 12 x: DOT4 __.x, R2.x, PV.x 0086 0080c402 20205000 y: DOT4 __.y, R2.y, R6.y 0088 0100c802 40205010 z: DOT4 R1.z, R2.z, R6.z 0090 019fcc02 60205000 w: DOT4 __.w, R2.w, PV.w 0092 811fc8fe 00a340f8 t: CNDGE R5.x, PV.z, PV.z, 0 0094 800000ff 00a06310 13 t: LOG_IEEE R5.x, PS 0096 801fe405 00a00110 14 x: MUL R5.x, R5.y, PS 0098 00000086 00801910 15 x: MOV R4.x, KC0[6].x 0100 00000c02 60801910 w: MOV R4.w, R2.w 0102 800000fe 00a06110 t: EXP_IEEE R5.x, PV.x 0104 001fc002 00205000 16 x: DOT4 __.x, R2.x, PV.x 0106 00808402 20205000 y: DOT4 __.y, R2.y, R4.y 0108 01008802 40205010 z: DOT4 R1.z, R2.z, R4.z 0110 819fcc02 60205000 w: DOT4 __.w, R2.w, PV.w 0112 0000a8fe 004340f8 17 x: CNDGE R2.x, PV.z, R5.x, 0 0114 801fc0fe 208340f8 y: CNDGE R4.y, PV.x, PV.x, 0 0116 009144fe 00828403 18 x: MULADD_IEEE R4.x, PV.y, KC0[10].y, R3.y 0118 011144fe 20428003 y: MULADD_IEEE R2.y, PV.y, KC0[10].z, R3.x 0120 801144fe 40268001 z: MULADD_IEEE R1.z, PV.y, KC0[10].x, R1.x VEC_021 0122 00116002 806288fe 19 x: MULADD_IEEE_sat R3.x, R2.x, KC0[11].x, PV.z 0124 00916002 a06280fe y: MULADD_IEEE_sat R3.y, R2.x, KC0[11].y, PV.x 0126 01116002 c06284fe z: MULADD_IEEE_sat R3.z, R2.x, KC0[11].z, PV.y 0128 80000401 e0601910 w: MOV_sat R3.w, R1.y 0004 c000203c 94400688 EXPORT_DONE POS 60 R0.xyzw VPM 0006 c001c000 94600688 EXPORT_DONE PARAM 0 R3.xyzw VPM EOP ===== SHADER_END =============================================================== -------------------------------------------------------------- Vertex elements state: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32_FLOAT, } {src_offset = 12, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32_FLOAT, } ===== SHADER #28 ======================================== FETCH/RV670/R600 ===== ===== 12 dw ===== 3 gprs ===== 0 stack ========================================= 0000 00000002 81000400 VTX 2 @4 0004 7c00a000 8c151001 00080000 VFETCH R1.xyz1, R0.x, RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:48 NUM:0 COMP:0 MODE:1) 0008 7c00a000 8c151002 0008000c VFETCH R2.xyz1, R0.x + 12b , RID:160 VERTEX MFC:31 UCF:0 FMT(DTA:48 NUM:0 COMP:0 MODE:1) 0002 00000000 8a000000 RET @0 ===== SHADER_END =============================================================== 288 frames in 5.0 seconds = 57.500 FPS