FRAG 0: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) ret void } declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32) attributes #0 = { "ShaderType"="0" } Shader Disassembly: V_MOV_B32_e32 v0, 0 ; 7E000280 EXP 0, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001800 00000000 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %23, float %24, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_MOV_F32 v0, P0, 3, 0, [m0] ; C8020302 V_INTERP_MOV_F32 v1, P0, 2, 0, [m0] ; C8060202 V_INTERP_MOV_F32 v2, P0, 1, 0, [m0] ; C80A0102 V_INTERP_MOV_F32 v3, P0, 0, 0, [m0] ; C80E0002 EXP 15, 0, 0, 1, 1, v3, v2, v1, v0 ; F800180F 00010203 S_ENDPGM ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %23, float %24, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_MOV_F32 v0, P0, 3, 0, [m0] ; C8020302 V_INTERP_MOV_F32 v1, P0, 2, 0, [m0] ; C8060202 V_INTERP_MOV_F32 v2, P0, 1, 0, [m0] ; C80A0102 V_INTERP_MOV_F32 v3, P0, 0, 0, [m0] ; C80E0002 EXP 15, 0, 0, 1, 1, v3, v2, v1, v0 ; F800180F 00010203 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %10 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6) %13 = extractelement <4 x float> %12, i32 0 %14 = extractelement <4 x float> %12, i32 1 %15 = extractelement <4 x float> %12, i32 2 %16 = extractelement <4 x float> %12, i32 3 %17 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 1 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %6) %20 = extractelement <4 x float> %19, i32 0 %21 = extractelement <4 x float> %19, i32 1 %22 = extractelement <4 x float> %19, i32 2 %23 = extractelement <4 x float> %19, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float %16) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[8:9], 4 ; C0800904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000100 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 S_LOAD_DWORDX4 s[0:3], s[8:9], 0 ; C0800900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION 0: MOV OUT[0], IN[0] 1: END STREAMOUT 0: BUF0[0..0] <- OUT[0].x ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %13 = getelementptr [17 x <16 x i8>] addrspace(2)* %3, i64 0, i32 2 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %16 = load <16 x i8> addrspace(2)* %15, !tbaa !0 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %9) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = lshr i32 %6, 16 %23 = and i32 %22, 127 %24 = call i32 @llvm.SI.tid() %25 = icmp ult i32 %24, %23 br i1 %25, label %if-true-block, label %endif-block if-true-block: ; preds = %main_body %26 = add i32 %7, %24 %27 = mul i32 %8, 4 %28 = mul i32 %26, 4 %29 = add i32 %28, %27 %30 = bitcast float %18 to i32 call void @llvm.SI.tbuffer.store.i32(<16 x i8> %14, i32 %30, i32 1, i32 %29, i32 0, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) br label %endif-block endif-block: ; preds = %main_body, %if-true-block call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %18, float %19, float %20, float %21) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare i32 @llvm.SI.tid() #2 declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[8:9], 0 ; C0800900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 S_LSHR_B32 s0, s11, 16 ; 9000900B V_MOV_B32_e32 v4, 127 ; 7E0802FF 0000007F V_AND_B32_e32 v5, s0, v4 ; 360A0800 V_MBCNT_LO_U32_B32_e64 v4, -1, 0, 0, 0, 0, 0 ; D2460004 020100C1 V_MBCNT_HI_U32_B32_e32 v4, -1, v4 ; 480808C1 V_CMP_LT_U32_e64 s[0:1], v4, v5, 0, 0, 0, 0 ; D1820000 02020B04 S_WAITCNT vmcnt(0) ; BF8C0770 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E V_ADD_I32_e32 v4, s12, v4 ; 4A08080C V_LSHLREV_B32_e32 v4, 2, v4 ; 34080882 S_LSHL_B32 s2, s13, 2 ; 8F02820D V_ADD_I32_e32 v4, s2, v4 ; 4A080802 S_LOAD_DWORDX4 s[4:7], s[6:7], 8 ; C0820708 S_WAITCNT lgkmcnt(0) ; BF8C007F TBUFFER_STORE_FORMAT_X v0, 0, -1, 0, -1, 0, 4, 4, v4, s[4:7], -1, 0, 0 ; EA245000 80410004 S_WAITCNT vmcnt(0) expcnt(0) ; BF8C0700 S_OR_B64 exec, exec, s[0:1] ; 88FE007E EXP 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL SV[0], INSTANCEID DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], LAYER 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: MOV OUT[2], SV[0] 3: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %10 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6) %13 = extractelement <4 x float> %12, i32 0 %14 = extractelement <4 x float> %12, i32 1 %15 = extractelement <4 x float> %12, i32 2 %16 = extractelement <4 x float> %12, i32 3 %17 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 1 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %6) %20 = extractelement <4 x float> %19, i32 0 %21 = extractelement <4 x float> %19, i32 1 %22 = extractelement <4 x float> %19, i32 2 %23 = extractelement <4 x float> %19, i32 3 %24 = bitcast i32 %9 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 4, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float %24, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[8:9], 4 ; C0800904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[4:7], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000400 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 S_LOAD_DWORDX4 s[0:3], s[8:9], 0 ; C0800900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[4:7], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000400 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 0, 0, v4, v5, v6, v7 ; F80000CF 07060504 V_MOV_B32_e32 v0, 0.000000e+00 ; 7E000280 EXP 4, 13, 0, 1, 0, v0, v0, v3, v0 ; F80008D4 00030000 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[0], CONSTANT 0: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } Shader Disassembly: V_MOV_B32_e32 v0, 0 ; 7E000280 EXP 0, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001800 00000000 S_ENDPGM ; BF810000 FRAG 0: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) ret void } declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32) attributes #0 = { "ShaderType"="0" } Shader Disassembly: V_MOV_B32_e32 v0, 0 ; 7E000280 EXP 0, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001800 00000000 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %23, float %24, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_MOV_F32 v0, P0, 3, 0, [m0] ; C8020302 V_INTERP_MOV_F32 v1, P0, 2, 0, [m0] ; C8060202 V_INTERP_MOV_F32 v2, P0, 1, 0, [m0] ; C80A0102 V_INTERP_MOV_F32 v3, P0, 0, 0, [m0] ; C80E0002 EXP 15, 0, 0, 1, 1, v3, v2, v1, v0 ; F800180F 00010203 S_ENDPGM ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %23, float %24, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_MOV_F32 v0, P0, 3, 0, [m0] ; C8020302 V_INTERP_MOV_F32 v1, P0, 2, 0, [m0] ; C8060202 V_INTERP_MOV_F32 v2, P0, 1, 0, [m0] ; C80A0102 V_INTERP_MOV_F32 v3, P0, 0, 0, [m0] ; C80E0002 EXP 15, 0, 0, 1, 1, v3, v2, v1, v0 ; F800180F 00010203 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %10 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6) %13 = extractelement <4 x float> %12, i32 0 %14 = extractelement <4 x float> %12, i32 1 %15 = extractelement <4 x float> %12, i32 2 %16 = extractelement <4 x float> %12, i32 3 %17 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 1 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %6) %20 = extractelement <4 x float> %19, i32 0 %21 = extractelement <4 x float> %19, i32 1 %22 = extractelement <4 x float> %19, i32 2 %23 = extractelement <4 x float> %19, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float %16) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[8:9], 4 ; C0800904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000100 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 S_LOAD_DWORDX4 s[0:3], s[8:9], 0 ; C0800900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION 0: MOV OUT[0], IN[0] 1: END STREAMOUT 0: BUF0[0..0] <- OUT[0].x ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %13 = getelementptr [17 x <16 x i8>] addrspace(2)* %3, i64 0, i32 2 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %16 = load <16 x i8> addrspace(2)* %15, !tbaa !0 %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %9) %18 = extractelement <4 x float> %17, i32 0 %19 = extractelement <4 x float> %17, i32 1 %20 = extractelement <4 x float> %17, i32 2 %21 = extractelement <4 x float> %17, i32 3 %22 = lshr i32 %6, 16 %23 = and i32 %22, 127 %24 = call i32 @llvm.SI.tid() %25 = icmp ult i32 %24, %23 br i1 %25, label %if-true-block, label %endif-block if-true-block: ; preds = %main_body %26 = add i32 %7, %24 %27 = mul i32 %8, 4 %28 = mul i32 %26, 4 %29 = add i32 %28, %27 %30 = bitcast float %18 to i32 call void @llvm.SI.tbuffer.store.i32(<16 x i8> %14, i32 %30, i32 1, i32 %29, i32 0, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0) br label %endif-block endif-block: ; preds = %main_body, %if-true-block call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %18, float %19, float %20, float %21) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare i32 @llvm.SI.tid() #2 declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[8:9], 0 ; C0800900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 S_LSHR_B32 s0, s11, 16 ; 9000900B V_MOV_B32_e32 v4, 127 ; 7E0802FF 0000007F V_AND_B32_e32 v5, s0, v4 ; 360A0800 V_MBCNT_LO_U32_B32_e64 v4, -1, 0, 0, 0, 0, 0 ; D2460004 020100C1 V_MBCNT_HI_U32_B32_e32 v4, -1, v4 ; 480808C1 V_CMP_LT_U32_e64 s[0:1], v4, v5, 0, 0, 0, 0 ; D1820000 02020B04 S_WAITCNT vmcnt(0) ; BF8C0770 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E V_ADD_I32_e32 v4, s12, v4 ; 4A08080C V_LSHLREV_B32_e32 v4, 2, v4 ; 34080882 S_LSHL_B32 s2, s13, 2 ; 8F02820D V_ADD_I32_e32 v4, s2, v4 ; 4A080802 S_LOAD_DWORDX4 s[4:7], s[6:7], 8 ; C0820708 S_WAITCNT lgkmcnt(0) ; BF8C007F TBUFFER_STORE_FORMAT_X v0, 0, -1, 0, -1, 0, 4, 4, v4, s[4:7], -1, 0, 0 ; EA245000 80410004 S_WAITCNT vmcnt(0) expcnt(0) ; BF8C0700 S_OR_B64 exec, exec, s[0:1] ; 88FE007E EXP 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL SV[0], INSTANCEID DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] DCL OUT[2], LAYER 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: MOV OUT[2], SV[0] 3: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %10 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6) %13 = extractelement <4 x float> %12, i32 0 %14 = extractelement <4 x float> %12, i32 1 %15 = extractelement <4 x float> %12, i32 2 %16 = extractelement <4 x float> %12, i32 3 %17 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 1 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %6) %20 = extractelement <4 x float> %19, i32 0 %21 = extractelement <4 x float> %19, i32 1 %22 = extractelement <4 x float> %19, i32 2 %23 = extractelement <4 x float> %19, i32 3 %24 = bitcast i32 %9 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 12, i32 0, float %13, float %14, float %15, float %16) call void @llvm.SI.export(i32 4, i32 0, i32 1, i32 13, i32 0, float 0.000000e+00, float 0.000000e+00, float %24, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[8:9], 4 ; C0800904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[4:7], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000400 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504 S_LOAD_DWORDX4 s[0:3], s[8:9], 0 ; C0800900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[4:7], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000400 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 0, 0, v4, v5, v6, v7 ; F80000CF 07060504 V_MOV_B32_e32 v0, 0.000000e+00 ; 7E000280 EXP 4, 13, 0, 1, 0, v0, v0, v3, v0 ; F80008D4 00030000 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[0], CONSTANT 0: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } Shader Disassembly: V_MOV_B32_e32 v0, 0 ; 7E000280 EXP 0, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001800 00000000 S_ENDPGM ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], COLOR, COLOR DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %23, float %24, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 3, 0, [m0] ; C8080300 V_INTERP_P2_F32 v2, [v2], v1, 3, 0, [m0] ; C8090301 V_INTERP_P1_F32 v3, v0, 2, 0, [m0] ; C80C0200 V_INTERP_P2_F32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 V_INTERP_P1_F32 v4, v0, 1, 0, [m0] ; C8100100 V_INTERP_P2_F32 v4, [v4], v1, 1, 0, [m0] ; C8110101 V_INTERP_P1_F32 v5, v0, 0, 0, [m0] ; C8140000 V_INTERP_P2_F32 v5, [v5], v1, 0, 0, [m0] ; C8150001 EXP 15, 0, 0, 1, 1, v5, v4, v3, v2 ; F800180F 02030405 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], COLOR DCL CONST[0..3] DCL TEMP[0] 0: MUL TEMP[0], IN[0].xxxx, CONST[0] 1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0] 3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0] 4: MOV OUT[1], IN[1] 5: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %10 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0 %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 %12 = call float @llvm.SI.load.const(<16 x i8> %11, i32 0) %13 = call float @llvm.SI.load.const(<16 x i8> %11, i32 4) %14 = call float @llvm.SI.load.const(<16 x i8> %11, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %11, i32 12) %16 = call float @llvm.SI.load.const(<16 x i8> %11, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %11, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %11, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %11, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %11, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %11, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %11, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %11, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %11, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %11, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %11, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %11, i32 60) %28 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %29 = load <16 x i8> addrspace(2)* %28, !tbaa !0 %30 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %29, i32 0, i32 %6) %31 = extractelement <4 x float> %30, i32 0 %32 = extractelement <4 x float> %30, i32 1 %33 = extractelement <4 x float> %30, i32 2 %34 = extractelement <4 x float> %30, i32 3 %35 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 1 %36 = load <16 x i8> addrspace(2)* %35, !tbaa !0 %37 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %6) %38 = extractelement <4 x float> %37, i32 0 %39 = extractelement <4 x float> %37, i32 1 %40 = extractelement <4 x float> %37, i32 2 %41 = extractelement <4 x float> %37, i32 3 %42 = fmul float %31, %12 %43 = fmul float %31, %13 %44 = fmul float %31, %14 %45 = fmul float %31, %15 %46 = fmul float %32, %16 %47 = fadd float %46, %42 %48 = fmul float %32, %17 %49 = fadd float %48, %43 %50 = fmul float %32, %18 %51 = fadd float %50, %44 %52 = fmul float %32, %19 %53 = fadd float %52, %45 %54 = fmul float %33, %20 %55 = fadd float %54, %47 %56 = fmul float %33, %21 %57 = fadd float %56, %49 %58 = fmul float %33, %22 %59 = fadd float %58, %51 %60 = fmul float %33, %23 %61 = fadd float %60, %53 %62 = fmul float %34, %24 %63 = fadd float %62, %55 %64 = fmul float %34, %25 %65 = fadd float %64, %57 %66 = fmul float %34, %26 %67 = fadd float %66, %59 %68 = fmul float %34, %27 %69 = fadd float %68, %61 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %38, float %39, float %40, float %41) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %63, float %65, float %67, float %69) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[4:7], s[8:9], 4 ; C0820904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[4:7][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010100 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 S_LOAD_DWORDX4 s[4:7], s[8:9], 0 ; C0820900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_LOAD_DWORDX4 s[0:3], s[0:1], 0 ; C0800100 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s4, s[0:3], 3 ; C2020103 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v4, s4 ; 7E080204 V_MUL_F32_e64 v4, v0, v4, 0, 0, 0, 0 ; D2100004 02020900 S_BUFFER_LOAD_DWORD s4, s[0:3], 7 ; C2020107 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v1, v5, v4, 0, 0, 0, 0 ; D2820004 04120B01 S_BUFFER_LOAD_DWORD s4, s[0:3], 11 ; C202010B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v2, v5, v4, 0, 0, 0, 0 ; D2820004 04120B02 S_BUFFER_LOAD_DWORD s4, s[0:3], 15 ; C202010F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MAD_F32 v4, v3, v5, v4, 0, 0, 0, 0 ; D2820004 04120B03 S_BUFFER_LOAD_DWORD s4, s[0:3], 2 ; C2020102 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s4 ; 7E0A0204 V_MUL_F32_e64 v5, v0, v5, 0, 0, 0, 0 ; D2100005 02020B00 S_BUFFER_LOAD_DWORD s4, s[0:3], 6 ; C2020106 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v1, v6, v5, 0, 0, 0, 0 ; D2820005 04160D01 S_BUFFER_LOAD_DWORD s4, s[0:3], 10 ; C202010A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v2, v6, v5, 0, 0, 0, 0 ; D2820005 04160D02 S_BUFFER_LOAD_DWORD s4, s[0:3], 14 ; C202010E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MAD_F32 v5, v3, v6, v5, 0, 0, 0, 0 ; D2820005 04160D03 S_BUFFER_LOAD_DWORD s4, s[0:3], 1 ; C2020101 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v6, s4 ; 7E0C0204 V_MUL_F32_e64 v6, v0, v6, 0, 0, 0, 0 ; D2100006 02020D00 S_BUFFER_LOAD_DWORD s4, s[0:3], 5 ; C2020105 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v6, v1, v7, v6, 0, 0, 0, 0 ; D2820006 041A0F01 S_BUFFER_LOAD_DWORD s4, s[0:3], 9 ; C2020109 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v6, v2, v7, v6, 0, 0, 0, 0 ; D2820006 041A0F02 S_BUFFER_LOAD_DWORD s4, s[0:3], 13 ; C202010D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MAD_F32 v6, v3, v7, v6, 0, 0, 0, 0 ; D2820006 041A0F03 S_BUFFER_LOAD_DWORD s4, s[0:3], 0 ; C2020100 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v7, s4 ; 7E0E0204 V_MUL_F32_e64 v7, v0, v7, 0, 0, 0, 0 ; D2100007 02020F00 S_BUFFER_LOAD_DWORD s4, s[0:3], 4 ; C2020104 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s4 ; 7E100204 V_MAD_F32 v7, v1, v8, v7, 0, 0, 0, 0 ; D2820007 041E1101 S_BUFFER_LOAD_DWORD s4, s[0:3], 8 ; C2020108 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s4 ; 7E100204 V_MAD_F32 v7, v2, v8, v7, 0, 0, 0, 0 ; D2820007 041E1102 S_BUFFER_LOAD_DWORD s0, s[0:3], 12 ; C200010C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v8, s0 ; 7E100200 V_MAD_F32 v0, v3, v8, v7, 0, 0, 0, 0 ; D2820000 041E1103 EXP 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 S_ENDPGM ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_MOV_F32 v0, P0, 3, 0, [m0] ; C8020302 V_INTERP_MOV_F32 v1, P0, 2, 0, [m0] ; C8060202 V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 V_INTERP_MOV_F32 v1, P0, 1, 0, [m0] ; C8060102 V_INTERP_MOV_F32 v2, P0, 0, 0, [m0] ; C80A0002 V_CVT_PKRTZ_F16_F32_e32 v1, v2, v1 ; 5E020302 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0 %23 = load <32 x i8> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0 %25 = load <16 x i8> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call i32 @llvm.SI.packf16(float %33, float %34) %38 = bitcast i32 %37 to float %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[0:3], s[2:3], 0 ; C0800300 S_LOAD_DWORDX8 s[4:11], s[4:5], 0 ; C0C20500 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 S_WAITCNT vmcnt(0) ; BF8C0770 V_CVT_PKRTZ_F16_F32_e32 v4, v2, v3 ; 5E080702 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v1 ; 5E000300 EXP 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0 %23 = load <32 x i8> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0 %25 = load <16 x i8> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call i32 @llvm.SI.packf16(float %33, float %34) %38 = bitcast i32 %37 to float %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[0:3], s[2:3], 0 ; C0800300 S_LOAD_DWORDX8 s[4:11], s[4:5], 0 ; C0C20500 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 S_WAITCNT vmcnt(0) ; BF8C0770 V_CVT_PKRTZ_F16_F32_e32 v4, v2, v3 ; 5E080702 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v1 ; 5E000300 EXP 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0 %23 = load <32 x i8> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0 %25 = load <16 x i8> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call i32 @llvm.SI.packf16(float %33, float %34) %38 = bitcast i32 %37 to float %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v3, v0, 1, 0, [m0] ; C80C0100 V_INTERP_P2_F32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 S_LOAD_DWORDX4 s[0:3], s[2:3], 0 ; C0800300 S_LOAD_DWORDX8 s[4:11], s[4:5], 0 ; C0C20500 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 S_WAITCNT vmcnt(0) ; BF8C0770 V_CVT_PKRTZ_F16_F32_e32 v4, v2, v3 ; 5E080702 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v1 ; 5E000300 EXP 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %10 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6) %13 = extractelement <4 x float> %12, i32 0 %14 = extractelement <4 x float> %12, i32 1 %15 = extractelement <4 x float> %12, i32 2 %16 = extractelement <4 x float> %12, i32 3 %17 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 1 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %6) %20 = extractelement <4 x float> %19, i32 0 %21 = extractelement <4 x float> %19, i32 1 %22 = extractelement <4 x float> %19, i32 2 %23 = extractelement <4 x float> %19, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %20, float %21, float %22, float %23) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float %16) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[8:9], 4 ; C0800904 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[1:4], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000100 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 S_LOAD_DWORDX4 s[0:3], s[8:9], 0 ; C0800900 S_WAITCNT expcnt(0) lgkmcnt(0) ; BF8C000F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[20], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL CONST[3..5] DCL CONST[1][0..96] DCL CONST[2][0..68] DCL TEMP[0..6], LOCAL DCL ADDR[0] IMM[0] UINT32 {0, 256, 272, 288} IMM[1] INT32 {16, 17, 18, 19} IMM[2] UINT32 {304, 192, 208, 224} IMM[3] FLT32 { -0.8000, 0.5000, 2.0000, 1.0000} IMM[4] INT32 {12, 13, 14, 15} IMM[5] UINT32 {240, 0, 0, 0} IMM[6] FLT32 { -2.0000, 0.0000, 0.0000, 0.0000} 0: UARL ADDR[0].x, IMM[1].xxxx 1: UARL ADDR[0].x, IMM[1].xxxx 2: MOV TEMP[0], CONST[1][ADDR[0].x] 3: UARL ADDR[0].x, IMM[1].yyyy 4: UARL ADDR[0].x, IMM[1].yyyy 5: MOV TEMP[1], CONST[1][ADDR[0].x] 6: UARL ADDR[0].x, IMM[1].zzzz 7: UARL ADDR[0].x, IMM[1].zzzz 8: MOV TEMP[2], CONST[1][ADDR[0].x] 9: UARL ADDR[0].x, IMM[1].wwww 10: UARL ADDR[0].x, IMM[1].wwww 11: MOV TEMP[3], CONST[1][ADDR[0].x] 12: MUL TEMP[4], TEMP[0], IN[0].xxxx 13: MAD TEMP[4], TEMP[1], IN[0].yyyy, TEMP[4] 14: MAD TEMP[4], TEMP[2], IMM[3].xxxx, TEMP[4] 15: ADD TEMP[4], TEMP[4], TEMP[3] 16: MUL TEMP[0], TEMP[0], IN[0].xxxx 17: MAD TEMP[0], TEMP[1], IN[0].yyyy, TEMP[0] 18: MAD TEMP[0], TEMP[2], IMM[3].yyyy, TEMP[0] 19: ADD TEMP[0], TEMP[0], TEMP[3] 20: RCP TEMP[1].x, TEMP[4].wwww 21: MUL TEMP[1], TEMP[4], TEMP[1].xxxx 22: RCP TEMP[2].x, TEMP[0].wwww 23: MUL TEMP[0], TEMP[0], TEMP[2].xxxx 24: UARL ADDR[0].x, IMM[4].xxxx 25: UARL ADDR[0].x, IMM[4].xxxx 26: MOV TEMP[2], CONST[1][ADDR[0].x] 27: UARL ADDR[0].x, IMM[4].yyyy 28: UARL ADDR[0].x, IMM[4].yyyy 29: MOV TEMP[3], CONST[1][ADDR[0].x] 30: UARL ADDR[0].x, IMM[4].zzzz 31: UARL ADDR[0].x, IMM[4].zzzz 32: MOV TEMP[4], CONST[1][ADDR[0].x] 33: UARL ADDR[0].x, IMM[4].wwww 34: UARL ADDR[0].x, IMM[4].wwww 35: MOV TEMP[5], CONST[1][ADDR[0].x] 36: MUL TEMP[6], TEMP[2], TEMP[0].xxxx 37: MAD TEMP[6], TEMP[3], TEMP[0].yyyy, TEMP[6] 38: MAD TEMP[6], TEMP[4], TEMP[0].zzzz, TEMP[6] 39: MAD TEMP[0], TEMP[5], TEMP[0].wwww, TEMP[6] 40: MUL TEMP[2], TEMP[2], TEMP[1].xxxx 41: MAD TEMP[2], TEMP[3], TEMP[1].yyyy, TEMP[2] 42: MAD TEMP[2], TEMP[4], TEMP[1].zzzz, TEMP[2] 43: MAD TEMP[1], TEMP[5], TEMP[1].wwww, TEMP[2] 44: ADD TEMP[0].xyz, TEMP[0], -TEMP[1] 45: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 46: RSQ TEMP[1].x, TEMP[1].xxxx 47: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 48: MUL TEMP[1].x, CONST[3].xxxx, CONST[3].xxxx 49: MUL TEMP[2].x, IMM[3].zzzz, CONST[3].xxxx 50: MOV TEMP[3].xyz, TEMP[0].xyzz 51: TEX TEMP[3], TEMP[3], SAMP[2], CUBE 52: ADD TEMP[4].x, TEMP[1].xxxx, -TEMP[2].xxxx 53: ADD TEMP[4].x, TEMP[4].xxxx, IMM[3].wwww 54: MUL TEMP[4].x, TEMP[4].xxxx, IMM[3].yyyy 55: MOV TEMP[5].xyz, TEMP[0].xyzz 56: TEX TEMP[5], TEMP[5], SAMP[1], CUBE 57: MUL TEMP[6].x, IMM[6].xxxx, CONST[3].xxxx 58: MAD TEMP[2].x, TEMP[6].xxxx, CONST[3].xxxx, TEMP[2].xxxx 59: ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].wwww 60: MUL TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy 61: MOV TEMP[0].xyz, TEMP[0].xyzz 62: TEX TEMP[0], TEMP[0], SAMP[0], CUBE 63: MUL TEMP[1].x, TEMP[1].xxxx, IMM[3].yyyy 64: MUL TEMP[0], TEMP[0], TEMP[1].xxxx 65: MAD TEMP[0], TEMP[5], TEMP[2].xxxx, TEMP[0] 66: MAD TEMP[0].xyz, TEMP[3], TEMP[4].xxxx, TEMP[0] 67: MOV TEMP[0].xyz, TEMP[0].xyzx 68: MOV OUT[0], TEMP[0] 69: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 1 %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0 %27 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0 %28 = load <32 x i8> addrspace(2)* %27, !tbaa !0 %29 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1 %32 = load <32 x i8> addrspace(2)* %31, !tbaa !0 %33 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1 %34 = load <16 x i8> addrspace(2)* %33, !tbaa !0 %35 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2 %36 = load <32 x i8> addrspace(2)* %35, !tbaa !0 %37 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %41 = shl i32 16, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %41) %43 = shl i32 16, 4 %44 = add i32 %43, 4 %45 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %44) %46 = shl i32 16, 4 %47 = add i32 %46, 8 %48 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %47) %49 = shl i32 16, 4 %50 = add i32 %49, 12 %51 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %50) %52 = shl i32 17, 4 %53 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %52) %54 = shl i32 17, 4 %55 = add i32 %54, 4 %56 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %55) %57 = shl i32 17, 4 %58 = add i32 %57, 8 %59 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %58) %60 = shl i32 17, 4 %61 = add i32 %60, 12 %62 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %61) %63 = shl i32 18, 4 %64 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %63) %65 = shl i32 18, 4 %66 = add i32 %65, 4 %67 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %66) %68 = shl i32 18, 4 %69 = add i32 %68, 8 %70 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %69) %71 = shl i32 18, 4 %72 = add i32 %71, 12 %73 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %72) %74 = shl i32 19, 4 %75 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %74) %76 = shl i32 19, 4 %77 = add i32 %76, 4 %78 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %77) %79 = shl i32 19, 4 %80 = add i32 %79, 8 %81 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %80) %82 = shl i32 19, 4 %83 = add i32 %82, 12 %84 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %83) %85 = fmul float %42, %39 %86 = fmul float %45, %39 %87 = fmul float %48, %39 %88 = fmul float %51, %39 %89 = fmul float %53, %40 %90 = fadd float %89, %85 %91 = fmul float %56, %40 %92 = fadd float %91, %86 %93 = fmul float %59, %40 %94 = fadd float %93, %87 %95 = fmul float %62, %40 %96 = fadd float %95, %88 %97 = fmul float %64, 0xBFE99999A0000000 %98 = fadd float %97, %90 %99 = fmul float %67, 0xBFE99999A0000000 %100 = fadd float %99, %92 %101 = fmul float %70, 0xBFE99999A0000000 %102 = fadd float %101, %94 %103 = fmul float %73, 0xBFE99999A0000000 %104 = fadd float %103, %96 %105 = fadd float %98, %75 %106 = fadd float %100, %78 %107 = fadd float %102, %81 %108 = fadd float %104, %84 %109 = fmul float %42, %39 %110 = fmul float %45, %39 %111 = fmul float %48, %39 %112 = fmul float %51, %39 %113 = fmul float %53, %40 %114 = fadd float %113, %109 %115 = fmul float %56, %40 %116 = fadd float %115, %110 %117 = fmul float %59, %40 %118 = fadd float %117, %111 %119 = fmul float %62, %40 %120 = fadd float %119, %112 %121 = fmul float %64, 5.000000e-01 %122 = fadd float %121, %114 %123 = fmul float %67, 5.000000e-01 %124 = fadd float %123, %116 %125 = fmul float %70, 5.000000e-01 %126 = fadd float %125, %118 %127 = fmul float %73, 5.000000e-01 %128 = fadd float %127, %120 %129 = fadd float %122, %75 %130 = fadd float %124, %78 %131 = fadd float %126, %81 %132 = fadd float %128, %84 %133 = fdiv float 1.000000e+00, %108 %134 = fmul float %105, %133 %135 = fmul float %106, %133 %136 = fmul float %107, %133 %137 = fmul float %108, %133 %138 = fdiv float 1.000000e+00, %132 %139 = fmul float %129, %138 %140 = fmul float %130, %138 %141 = fmul float %131, %138 %142 = fmul float %132, %138 %143 = shl i32 12, 4 %144 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %143) %145 = shl i32 12, 4 %146 = add i32 %145, 4 %147 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %146) %148 = shl i32 12, 4 %149 = add i32 %148, 8 %150 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %149) %151 = shl i32 12, 4 %152 = add i32 %151, 12 %153 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %152) %154 = shl i32 13, 4 %155 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %154) %156 = shl i32 13, 4 %157 = add i32 %156, 4 %158 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %157) %159 = shl i32 13, 4 %160 = add i32 %159, 8 %161 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %160) %162 = shl i32 13, 4 %163 = add i32 %162, 12 %164 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %163) %165 = shl i32 14, 4 %166 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %165) %167 = shl i32 14, 4 %168 = add i32 %167, 4 %169 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %168) %170 = shl i32 14, 4 %171 = add i32 %170, 8 %172 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %171) %173 = shl i32 14, 4 %174 = add i32 %173, 12 %175 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %174) %176 = shl i32 15, 4 %177 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %176) %178 = shl i32 15, 4 %179 = add i32 %178, 4 %180 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %179) %181 = shl i32 15, 4 %182 = add i32 %181, 8 %183 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %182) %184 = shl i32 15, 4 %185 = add i32 %184, 12 %186 = call float @llvm.SI.load.const(<16 x i8> %26, i32 %185) %187 = fmul float %144, %139 %188 = fmul float %147, %139 %189 = fmul float %150, %139 %190 = fmul float %153, %139 %191 = fmul float %155, %140 %192 = fadd float %191, %187 %193 = fmul float %158, %140 %194 = fadd float %193, %188 %195 = fmul float %161, %140 %196 = fadd float %195, %189 %197 = fmul float %164, %140 %198 = fadd float %197, %190 %199 = fmul float %166, %141 %200 = fadd float %199, %192 %201 = fmul float %169, %141 %202 = fadd float %201, %194 %203 = fmul float %172, %141 %204 = fadd float %203, %196 %205 = fmul float %175, %141 %206 = fadd float %205, %198 %207 = fmul float %177, %142 %208 = fadd float %207, %200 %209 = fmul float %180, %142 %210 = fadd float %209, %202 %211 = fmul float %183, %142 %212 = fadd float %211, %204 %213 = fmul float %186, %142 %214 = fadd float %213, %206 %215 = fmul float %144, %134 %216 = fmul float %147, %134 %217 = fmul float %150, %134 %218 = fmul float %155, %135 %219 = fadd float %218, %215 %220 = fmul float %158, %135 %221 = fadd float %220, %216 %222 = fmul float %161, %135 %223 = fadd float %222, %217 %224 = fmul float %166, %136 %225 = fadd float %224, %219 %226 = fmul float %169, %136 %227 = fadd float %226, %221 %228 = fmul float %172, %136 %229 = fadd float %228, %223 %230 = fmul float %177, %137 %231 = fadd float %230, %225 %232 = fmul float %180, %137 %233 = fadd float %232, %227 %234 = fmul float %183, %137 %235 = fadd float %234, %229 %236 = fsub float -0.000000e+00, %231 %237 = fadd float %208, %236 %238 = fsub float -0.000000e+00, %233 %239 = fadd float %210, %238 %240 = fsub float -0.000000e+00, %235 %241 = fadd float %212, %240 %242 = fmul float %237, %237 %243 = fmul float %239, %239 %244 = fadd float %243, %242 %245 = fmul float %241, %241 %246 = fadd float %244, %245 %247 = call float @llvm.AMDGPU.rsq(float %246) %248 = fmul float %237, %247 %249 = fmul float %239, %247 %250 = fmul float %241, %247 %251 = fmul float %24, %24 %252 = fmul float 2.000000e+00, %24 %253 = insertelement <4 x float> undef, float %248, i32 0 %254 = insertelement <4 x float> %253, float %249, i32 1 %255 = insertelement <4 x float> %254, float %250, i32 2 %256 = insertelement <4 x float> %255, float %164, i32 3 %257 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %256) %258 = extractelement <4 x float> %257, i32 0 %259 = extractelement <4 x float> %257, i32 1 %260 = extractelement <4 x float> %257, i32 2 %261 = extractelement <4 x float> %257, i32 3 %262 = call float @fabs(float %260) %263 = fdiv float 1.000000e+00, %262 %264 = fmul float %258, %263 %265 = fadd float %264, 1.500000e+00 %266 = fmul float %259, %263 %267 = fadd float %266, 1.500000e+00 %268 = bitcast float %267 to i32 %269 = bitcast float %265 to i32 %270 = bitcast float %261 to i32 %271 = insertelement <4 x i32> undef, i32 %268, i32 0 %272 = insertelement <4 x i32> %271, i32 %269, i32 1 %273 = insertelement <4 x i32> %272, i32 %270, i32 2 %274 = insertelement <4 x i32> %273, i32 undef, i32 3 %275 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %274, <32 x i8> %36, <16 x i8> %38, i32 4) %276 = extractelement <4 x float> %275, i32 0 %277 = extractelement <4 x float> %275, i32 1 %278 = extractelement <4 x float> %275, i32 2 %279 = fsub float -0.000000e+00, %252 %280 = fadd float %251, %279 %281 = fadd float %280, 1.000000e+00 %282 = fmul float %281, 5.000000e-01 %283 = insertelement <4 x float> undef, float %248, i32 0 %284 = insertelement <4 x float> %283, float %249, i32 1 %285 = insertelement <4 x float> %284, float %250, i32 2 %286 = insertelement <4 x float> %285, float %186, i32 3 %287 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %286) %288 = extractelement <4 x float> %287, i32 0 %289 = extractelement <4 x float> %287, i32 1 %290 = extractelement <4 x float> %287, i32 2 %291 = extractelement <4 x float> %287, i32 3 %292 = call float @fabs(float %290) %293 = fdiv float 1.000000e+00, %292 %294 = fmul float %288, %293 %295 = fadd float %294, 1.500000e+00 %296 = fmul float %289, %293 %297 = fadd float %296, 1.500000e+00 %298 = bitcast float %297 to i32 %299 = bitcast float %295 to i32 %300 = bitcast float %291 to i32 %301 = insertelement <4 x i32> undef, i32 %298, i32 0 %302 = insertelement <4 x i32> %301, i32 %299, i32 1 %303 = insertelement <4 x i32> %302, i32 %300, i32 2 %304 = insertelement <4 x i32> %303, i32 undef, i32 3 %305 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %304, <32 x i8> %32, <16 x i8> %34, i32 4) %306 = extractelement <4 x float> %305, i32 0 %307 = extractelement <4 x float> %305, i32 1 %308 = extractelement <4 x float> %305, i32 2 %309 = extractelement <4 x float> %305, i32 3 %310 = fmul float -2.000000e+00, %24 %311 = fmul float %310, %24 %312 = fadd float %311, %252 %313 = fadd float %312, 1.000000e+00 %314 = fmul float %313, 5.000000e-01 %315 = insertelement <4 x float> undef, float %248, i32 0 %316 = insertelement <4 x float> %315, float %249, i32 1 %317 = insertelement <4 x float> %316, float %250, i32 2 %318 = insertelement <4 x float> %317, float %214, i32 3 %319 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %318) %320 = extractelement <4 x float> %319, i32 0 %321 = extractelement <4 x float> %319, i32 1 %322 = extractelement <4 x float> %319, i32 2 %323 = extractelement <4 x float> %319, i32 3 %324 = call float @fabs(float %322) %325 = fdiv float 1.000000e+00, %324 %326 = fmul float %320, %325 %327 = fadd float %326, 1.500000e+00 %328 = fmul float %321, %325 %329 = fadd float %328, 1.500000e+00 %330 = bitcast float %329 to i32 %331 = bitcast float %327 to i32 %332 = bitcast float %323 to i32 %333 = insertelement <4 x i32> undef, i32 %330, i32 0 %334 = insertelement <4 x i32> %333, i32 %331, i32 1 %335 = insertelement <4 x i32> %334, i32 %332, i32 2 %336 = insertelement <4 x i32> %335, i32 undef, i32 3 %337 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %336, <32 x i8> %28, <16 x i8> %30, i32 4) %338 = extractelement <4 x float> %337, i32 0 %339 = extractelement <4 x float> %337, i32 1 %340 = extractelement <4 x float> %337, i32 2 %341 = extractelement <4 x float> %337, i32 3 %342 = fmul float %251, 5.000000e-01 %343 = fmul float %338, %342 %344 = fmul float %339, %342 %345 = fmul float %340, %342 %346 = fmul float %341, %342 %347 = fmul float %306, %314 %348 = fadd float %347, %343 %349 = fmul float %307, %314 %350 = fadd float %349, %344 %351 = fmul float %308, %314 %352 = fadd float %351, %345 %353 = fmul float %309, %314 %354 = fadd float %353, %346 %355 = fmul float %276, %282 %356 = fadd float %355, %348 %357 = fmul float %277, %282 %358 = fadd float %357, %350 %359 = fmul float %278, %282 %360 = fadd float %359, %352 %361 = call i32 @llvm.SI.packf16(float %356, float %358) %362 = bitcast i32 %361 to float %363 = call i32 @llvm.SI.packf16(float %360, float %354) %364 = bitcast i32 %363 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %362, float %364, float %362, float %364) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.rsq(float) #2 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 1, 0, [m0] ; C8080100 V_INTERP_P2_F32 v2, [v2], v1, 1, 0, [m0] ; C8090101 V_INTERP_P1_F32 v3, v0, 0, 0, [m0] ; C80C0000 V_INTERP_P2_F32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 S_LOAD_DWORDX4 s[8:11], s[0:1], 4 ; C0840104 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s6, s[8:11], 65 ; C2030941 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s6, v3 ; 10000606 S_BUFFER_LOAD_DWORD s6, s[8:11], 69 ; C2030945 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, s6, v2, v0, 0, 0, 0, 0 ; D2820000 04020406 S_BUFFER_LOAD_DWORD s6, s[8:11], 73 ; C2030949 V_MOV_B32_e32 v1, -8.000000e-01 ; 7E0202FF BF4CCCCD S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, s6, v1, v0, 0, 0, 0, 0 ; D2820004 04020206 S_BUFFER_LOAD_DWORD s7, s[8:11], 77 ; C203894D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s7, v4 ; 06080807 S_BUFFER_LOAD_DWORD s12, s[8:11], 67 ; C2060943 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s12, v3 ; 100A060C S_BUFFER_LOAD_DWORD s12, s[8:11], 71 ; C2060947 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, s12, v2, v5, 0, 0, 0, 0 ; D2820005 0416040C S_BUFFER_LOAD_DWORD s12, s[8:11], 75 ; C206094B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, s12, v1, v5, 0, 0, 0, 0 ; D2820006 0416020C S_BUFFER_LOAD_DWORD s13, s[8:11], 79 ; C206894F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v6, s13, v6 ; 060C0C0D V_RCP_F32_e32 v7, v6 ; 7E0E5506 V_MUL_F32_e32 v4, v4, v7 ; 10080F04 S_BUFFER_LOAD_DWORD s14, s[8:11], 64 ; C2070940 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v8, s14, v3 ; 1010060E S_BUFFER_LOAD_DWORD s14, s[8:11], 68 ; C2070944 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v8, s14, v2, v8, 0, 0, 0, 0 ; D2820008 0422040E S_BUFFER_LOAD_DWORD s14, s[8:11], 72 ; C2070948 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v9, s14, v1, v8, 0, 0, 0, 0 ; D2820009 0422020E S_BUFFER_LOAD_DWORD s15, s[8:11], 76 ; C207894C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v9, s15, v9 ; 0612120F V_MUL_F32_e32 v9, v9, v7 ; 10120F09 S_BUFFER_LOAD_DWORD s16, s[8:11], 49 ; C2080931 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v10, s16, v9 ; 10141210 S_BUFFER_LOAD_DWORD s17, s[8:11], 53 ; C2088935 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v10, s17, v4, v10, 0, 0, 0, 0 ; D282000A 042A0811 S_BUFFER_LOAD_DWORD s18, s[8:11], 66 ; C2090942 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s18, v3 ; 10060612 S_BUFFER_LOAD_DWORD s18, s[8:11], 70 ; C2090946 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, s18, v2, v3, 0, 0, 0, 0 ; D2820002 040E0412 S_BUFFER_LOAD_DWORD s18, s[8:11], 74 ; C209094A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, s18, v1, v2, 0, 0, 0, 0 ; D2820001 040A0212 S_BUFFER_LOAD_DWORD s19, s[8:11], 78 ; C209894E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v1, s19, v1 ; 06020213 V_MUL_F32_e32 v1, v1, v7 ; 10020F01 S_BUFFER_LOAD_DWORD s20, s[8:11], 57 ; C20A0939 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, s20, v1, v10, 0, 0, 0, 0 ; D2820003 042A0214 V_MUL_F32_e32 v6, v6, v7 ; 100C0F06 S_BUFFER_LOAD_DWORD s21, s[8:11], 61 ; C20A893D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, s21, v6, v3, 0, 0, 0, 0 ; D2820003 040E0C15 V_MAD_F32 v0, s6, 5.000000e-01, v0, 0, 0, 0, 0 ; D2820000 0401E006 V_ADD_F32_e32 v0, s7, v0 ; 06000007 V_MAD_F32 v5, s12, 5.000000e-01, v5, 0, 0, 0, 0 ; D2820005 0415E00C V_ADD_F32_e32 v5, s13, v5 ; 060A0A0D V_RCP_F32_e32 v7, v5 ; 7E0E5505 V_MUL_F32_e32 v0, v0, v7 ; 10000F00 V_MAD_F32 v8, s14, 5.000000e-01, v8, 0, 0, 0, 0 ; D2820008 0421E00E V_ADD_F32_e32 v8, s15, v8 ; 0610100F V_MUL_F32_e32 v8, v8, v7 ; 10100F08 V_MUL_F32_e32 v10, s16, v8 ; 10141010 V_MAD_F32 v10, s17, v0, v10, 0, 0, 0, 0 ; D282000A 042A0011 V_MAD_F32 v2, s18, 5.000000e-01, v2, 0, 0, 0, 0 ; D2820002 0409E012 V_ADD_F32_e32 v2, s19, v2 ; 06040413 V_MUL_F32_e32 v2, v2, v7 ; 10040F02 V_MAD_F32 v10, s20, v2, v10, 0, 0, 0, 0 ; D282000A 042A0414 V_MUL_F32_e32 v5, v5, v7 ; 100A0F05 V_MAD_F32 v7, s21, v5, v10, 0, 0, 0, 0 ; D2820007 042A0A15 V_SUB_F32_e32 v3, v7, v3 ; 08060707 S_BUFFER_LOAD_DWORD s6, s[8:11], 48 ; C2030930 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s6, v9 ; 100E1206 S_BUFFER_LOAD_DWORD s7, s[8:11], 52 ; C2038934 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, s7, v4, v7, 0, 0, 0, 0 ; D2820007 041E0807 S_BUFFER_LOAD_DWORD s12, s[8:11], 56 ; C2060938 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, s12, v1, v7, 0, 0, 0, 0 ; D2820007 041E020C S_BUFFER_LOAD_DWORD s13, s[8:11], 60 ; C206893C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, s13, v6, v7, 0, 0, 0, 0 ; D2820007 041E0C0D V_MUL_F32_e32 v10, s6, v8 ; 10141006 V_MAD_F32 v10, s7, v0, v10, 0, 0, 0, 0 ; D282000A 042A0007 V_MAD_F32 v10, s12, v2, v10, 0, 0, 0, 0 ; D282000A 042A040C V_MAD_F32 v10, s13, v5, v10, 0, 0, 0, 0 ; D282000A 042A0A0D V_SUB_F32_e32 v7, v10, v7 ; 080E0F0A V_MUL_F32_e32 v10, v7, v7 ; 10140F07 V_MAD_F32 v10, v3, v3, v10, 0, 0, 0, 0 ; D282000A 042A0703 S_BUFFER_LOAD_DWORD s6, s[8:11], 50 ; C2030932 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v9, s6, v9 ; 10121206 S_BUFFER_LOAD_DWORD s7, s[8:11], 54 ; C2038936 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, s7, v4, v9, 0, 0, 0, 0 ; D2820004 04260807 S_BUFFER_LOAD_DWORD s12, s[8:11], 58 ; C206093A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, s12, v1, v4, 0, 0, 0, 0 ; D2820001 0412020C S_BUFFER_LOAD_DWORD s13, s[8:11], 62 ; C206893E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, s13, v6, v1, 0, 0, 0, 0 ; D2820001 04060C0D V_MUL_F32_e32 v4, s6, v8 ; 10081006 V_MAD_F32 v4, s7, v0, v4, 0, 0, 0, 0 ; D2820004 04120007 V_MAD_F32 v4, s12, v2, v4, 0, 0, 0, 0 ; D2820004 0412040C V_MAD_F32 v4, s13, v5, v4, 0, 0, 0, 0 ; D2820004 04120A0D V_SUB_F32_e32 v1, v4, v1 ; 08020304 V_MAD_F32 v4, v1, v1, v10, 0, 0, 0, 0 ; D2820004 042A0301 V_RSQ_LEGACY_F32_e32 v4, v4 ; 7E085B04 V_MUL_F32_e32 v11, v1, v4 ; 10160901 V_MUL_F32_e32 v10, v3, v4 ; 10140903 V_MUL_F32_e32 v9, v7, v4 ; 10120907 S_BUFFER_LOAD_DWORD s6, s[8:11], 51 ; C2030933 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s6, v8 ; 10021006 S_BUFFER_LOAD_DWORD s6, s[8:11], 55 ; C2030937 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, s6, v0, v1, 0, 0, 0, 0 ; D2820000 04060006 S_BUFFER_LOAD_DWORD s7, s[8:11], 59 ; C203893B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, s7, v2, v0, 0, 0, 0, 0 ; D2820000 04020407 S_BUFFER_LOAD_DWORD s7, s[8:11], 63 ; C203893F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v12, s7, v5, v0, 0, 0, 0, 0 ; D282000C 04020A07 V_CUBESC_F32 v1, v9, v10, v11, 0, 0, 0, 0 ; D28A0001 042E1509 V_CUBETC_F32 v0, v9, v10, v11, 0, 0, 0, 0 ; D28C0000 042E1509 V_CUBEMA_F32 v2, v9, v10, v11, 0, 0, 0, 0 ; D28E0002 042E1509 V_CUBEID_F32 v3, v9, v10, v11, 0, 0, 0, 0 ; D2880003 042E1509 V_MOV_B32_e32 v8, 2147483647 ; 7E1002FF 7FFFFFFF V_AND_B32_e32 v13, v2, v8 ; 361A1102 V_RCP_F32_e32 v13, v13 ; 7E1A550D V_MOV_B32_e32 v14, 1.500000e+00 ; 7E1C02FF 3FC00000 V_MAD_F32 v2, v0, v13, v14, 0, 0, 0, 0 ; D2820002 043A1B00 V_MAD_F32 v1, v1, v13, v14, 0, 0, 0, 0 ; D2820001 043A1B01 S_LOAD_DWORDX4 s[8:11], s[2:3], 0 ; C0840300 S_LOAD_DWORDX8 s[12:19], s[4:5], 0 ; C0C60500 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[1:4], s[12:19], s[8:11] ; F0800F00 00430001 S_LOAD_DWORDX4 s[8:11], s[0:1], 0 ; C0840100 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s0, s[8:11], 12 ; C200090C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v4, s0 ; 7E080200 V_MUL_F32_e64 v5, s0, v4, 0, 0, 0, 0 ; D2100005 02020800 V_MUL_F32_e32 v6, 5.000000e-01, v5 ; 100C0AF0 V_MUL_F32_e32 v7, v3, v6 ; 100E0D03 V_CUBESC_F32 v16, v9, v10, v11, 0, 0, 0, 0 ; D28A0010 042E1509 V_CUBETC_F32 v15, v9, v10, v11, 0, 0, 0, 0 ; D28C000F 042E1509 V_CUBEMA_F32 v17, v9, v10, v11, 0, 0, 0, 0 ; D28E0011 042E1509 V_CUBEID_F32 v18, v9, v10, v11, 0, 0, 0, 0 ; D2880012 042E1509 V_AND_B32_e32 v13, v17, v8 ; 361A1111 V_RCP_F32_e32 v13, v13 ; 7E1A550D V_MAD_F32 v17, v15, v13, v14, 0, 0, 0, 0 ; D2820011 043A1B0F V_MAD_F32 v16, v16, v13, v14, 0, 0, 0, 0 ; D2820010 043A1B10 S_LOAD_DWORDX4 s[8:11], s[2:3], 4 ; C0840304 S_LOAD_DWORDX8 s[12:19], s[4:5], 8 ; C0C60508 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[16:19], s[12:19], s[8:11] ; F0800F00 00430F10 V_ADD_F32_e64 v4, s0, v4, 0, 0, 0, 0 ; D2060004 02020800 V_MUL_F32_e64 v13, s0, -2.000000e+00, 0, 0, 0, 0 ; D210000D 0201EA00 V_MAD_F32 v13, v13, s0, v4, 0, 0, 0, 0 ; D282000D 0410010D V_ADD_F32_e32 v13, 1.000000e+00, v13 ; 061A1AF2 V_MUL_F32_e32 v13, 5.000000e-01, v13 ; 101A1AF0 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v7, v18, v13, v7, 0, 0, 0, 0 ; D2820007 041E1B12 V_MUL_F32_e32 v19, v2, v6 ; 10260D02 V_MAD_F32 v19, v17, v13, v19, 0, 0, 0, 0 ; D2820013 044E1B11 V_MOV_B32_e32 v12, s6 ; 7E180206 V_CUBESC_F32 v21, v9, v10, v11, 0, 0, 0, 0 ; D28A0015 042E1509 V_CUBETC_F32 v20, v9, v10, v11, 0, 0, 0, 0 ; D28C0014 042E1509 V_CUBEMA_F32 v22, v9, v10, v11, 0, 0, 0, 0 ; D28E0016 042E1509 V_CUBEID_F32 v23, v9, v10, v11, 0, 0, 0, 0 ; D2880017 042E1509 V_AND_B32_e32 v8, v22, v8 ; 36101116 V_RCP_F32_e32 v8, v8 ; 7E105508 V_MAD_F32 v22, v20, v8, v14, 0, 0, 0, 0 ; D2820016 043A1114 V_MAD_F32 v21, v21, v8, v14, 0, 0, 0, 0 ; D2820015 043A1115 S_LOAD_DWORDX4 s[0:3], s[2:3], 8 ; C0800308 S_LOAD_DWORDX8 s[4:11], s[4:5], 16 ; C0C20510 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[8:10], 7, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[4:11], s[0:3] ; F0800700 00010815 V_SUB_F32_e32 v4, v5, v4 ; 08080905 V_ADD_F32_e32 v4, 1.000000e+00, v4 ; 060808F2 V_MUL_F32_e32 v4, 5.000000e-01, v4 ; 100808F0 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v5, v10, v4, v19, 0, 0, 0, 0 ; D2820005 044E090A V_CVT_PKRTZ_F16_F32_e32 v5, v5, v7 ; 5E0A0F05 V_MUL_F32_e32 v7, v1, v6 ; 100E0D01 V_MAD_F32 v7, v16, v13, v7, 0, 0, 0, 0 ; D2820007 041E1B10 V_MAD_F32 v7, v9, v4, v7, 0, 0, 0, 0 ; D2820007 041E0909 V_MUL_F32_e32 v0, v0, v6 ; 10000D00 V_MAD_F32 v0, v15, v13, v0, 0, 0, 0, 0 ; D2820000 04021B0F V_MAD_F32 v0, v8, v4, v0, 0, 0, 0, 0 ; D2820000 04020908 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v7 ; 5E000F00 EXP 15, 0, 1, 1, 1, v0, v5, v0, v5 ; F8001C0F 05000500 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[20] DCL CONST[1][0..96] DCL CONST[2][0..68] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[0].xyz, IN[0].xyzx 2: MOV TEMP[1].xy, IN[0].xyxx 3: MOV OUT[1], TEMP[1] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %10 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6) %13 = extractelement <4 x float> %12, i32 0 %14 = extractelement <4 x float> %12, i32 1 %15 = extractelement <4 x float> %12, i32 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[8:9], 0 ; C0800900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 V_MOV_B32_e32 v4, 0.000000e+00 ; 7E080280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v0, v1, v4, v4 ; F800020F 04040100 S_WAITCNT expcnt(0) ; BF8C070F V_MOV_B32_e32 v4, 1.000000e+00 ; 7E0802F2 EXP 15, 12, 0, 1, 0, v0, v1, v2, v4 ; F80008CF 04020100 S_ENDPGM ; BF810000 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %22, float %23, float %24, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_MOV_F32 v0, P0, 3, 0, [m0] ; C8020302 V_INTERP_MOV_F32 v1, P0, 2, 0, [m0] ; C8060202 V_INTERP_MOV_F32 v2, P0, 1, 0, [m0] ; C80A0102 V_INTERP_MOV_F32 v3, P0, 0, 0, [m0] ; C80E0002 EXP 15, 0, 0, 1, 1, v3, v2, v1, v0 ; F800180F 00010203 S_ENDPGM ; BF810000 FRAG DCL IN[0], POSITION, LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[2] DCL TEMP[0] DCL TEMP[1..3], LOCAL 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[2].xxxx, CONST[2].yyyy 2: MOV TEMP[1].xy, TEMP[0].xyyy 3: TEX TEMP[1], TEMP[1], SAMP[0], RECT 4: MOV TEMP[2].xy, TEMP[0].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[1], RECT 6: MOV_SAT TEMP[3].x, TEMP[1].wwww 7: MAD TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[1].xyzz 8: MOV OUT[0], TEMP[1] 9: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %26 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0 %27 = load <32 x i8> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0 %29 = load <16 x i8> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1 %31 = load <32 x i8> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = fmul float %15, %24 %35 = fadd float %34, %25 %36 = bitcast float %14 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %27, <16 x i8> %29, i32 5) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = bitcast float %14 to i32 %46 = bitcast float %35 to i32 %47 = insertelement <2 x i32> undef, i32 %45, i32 0 %48 = insertelement <2 x i32> %47, i32 %46, i32 1 %49 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %48, <32 x i8> %31, <16 x i8> %33, i32 5) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = call float @llvm.AMDIL.clamp.(float %44, float 0.000000e+00, float 1.000000e+00) %54 = fmul float %50, %53 %55 = fadd float %54, %41 %56 = fmul float %51, %53 %57 = fadd float %56, %42 %58 = fmul float %52, %53 %59 = fadd float %58, %43 %60 = call i32 @llvm.SI.packf16(float %55, float %57) %61 = bitcast i32 %60 to float %62 = call i32 @llvm.SI.packf16(float %59, float %44) %63 = bitcast i32 %62 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %61, float %63, float %61, float %63) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_MOV_B32_e32 v0, v2 ; 7E000302 S_LOAD_DWORDX4 s[8:11], s[0:1], 0 ; C0840100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s0, s[8:11], 8 ; C2000908 S_BUFFER_LOAD_DWORD s1, s[8:11], 9 ; C2008909 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s1 ; 7E040201 V_MAD_F32 v1, v3, s0, v2, 0, 0, 0, 0 ; D2820001 04080103 S_LOAD_DWORDX4 s[8:11], s[2:3], 4 ; C0840304 S_LOAD_DWORDX8 s[12:19], s[4:5], 8 ; C0C60508 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[2:4], 7, -1, 0, 0, 0, 0, 0, 0, v[0:1], s[12:19], s[8:11] ; F0801700 00430200 S_LOAD_DWORDX4 s[0:3], s[2:3], 0 ; C0800300 S_LOAD_DWORDX8 s[4:11], s[4:5], 0 ; C0C20500 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 IMAGE_SAMPLE v[5:8], 15, -1, 0, 0, 0, 0, 0, 0, v[0:1], s[4:11], s[0:3] ; F0801F00 00010500 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e64 v0, 0, v8, 0, 1, 0, 0 ; D2060800 02021080 V_MAD_F32 v1, v3, v0, v6, 0, 0, 0, 0 ; D2820001 041A0103 V_MAD_F32 v9, v2, v0, v5, 0, 0, 0, 0 ; D2820009 04160102 V_CVT_PKRTZ_F16_F32_e32 v1, v9, v1 ; 5E020309 V_MAD_F32 v0, v4, v0, v7, 0, 0, 0, 0 ; D2820000 041E0104 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v8 ; 5E001100 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[0].xyz, IN[0].xyzx 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %10 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6) %13 = extractelement <4 x float> %12, i32 0 %14 = extractelement <4 x float> %12, i32 1 %15 = extractelement <4 x float> %12, i32 2 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[8:9], 0 ; C0800900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 V_MOV_B32_e32 v4, 1.000000e+00 ; 7E0802F2 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 1, 0, v0, v1, v2, v4 ; F80008CF 04020100 S_ENDPGM ; BF810000 FRAG DCL IN[0], POSITION, LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[10] DCL CONST[0..7] DCL CONST[1][0..96] DCL CONST[2][0..68] DCL TEMP[0] DCL TEMP[1..37], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 0.5000, 2.0000, 10.0000} IMM[1] UINT32 {1, 192, 0, 160} IMM[2] INT32 {12, 10, 9, 8} IMM[3] UINT32 {144, 128, 352, 208} IMM[4] INT32 {22, 13, 0, 1} IMM[5] FLT32 { 100.0000, 1.3000, 1.0000, -1.0000} IMM[6] FLT32 { 0.7000, 0.0010, 2.2000, 35.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[10].xxxx, CONST[10].yyyy 2: MOV TEMP[1].xyz, IMM[0].xxxx 3: UARL ADDR[0].x, IMM[2].xxxx 4: UARL ADDR[0].x, IMM[2].xxxx 5: MOV TEMP[2].xyz, CONST[2][ADDR[0].x].xyzz 6: UARL ADDR[0].x, IMM[2].yyyy 7: MOV TEMP[3], CONST[1][ADDR[0].x] 8: UARL ADDR[0].x, IMM[2].zzzz 9: MOV TEMP[4], CONST[1][ADDR[0].x] 10: UARL ADDR[0].x, IMM[2].wwww 11: MOV TEMP[5], CONST[1][ADDR[0].x] 12: MUL TEMP[5], TEMP[5], TEMP[2].xxxx 13: MAD TEMP[4], TEMP[4], TEMP[2].yyyy, TEMP[5] 14: MAD TEMP[2], TEMP[3], TEMP[2].zzzz, TEMP[4] 15: MOV TEMP[3], TEMP[2] 16: FSLT TEMP[4].x, IMM[0].xxxx, TEMP[2].zzzz 17: UIF TEMP[4].xxxx :2 18: RCP TEMP[4].x, TEMP[2].wwww 19: MUL TEMP[2], TEMP[2], TEMP[4].xxxx 20: MOV TEMP[3].zw, TEMP[2].wwzw 21: MAD TEMP[2].xy, TEMP[2].xyyy, IMM[0].yyyy, IMM[0].yyyy 22: UARL ADDR[0].x, IMM[4].xxxx 23: MOV TEMP[4].xy, CONST[1][ADDR[0].x].xyyy 24: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[4].xyyy 25: MUL TEMP[3].xy, TEMP[2].xyyy, IMM[0].yyyy 26: FSLT TEMP[2].x, IMM[0].xxxx, CONST[7].xxxx 27: UIF TEMP[2].xxxx :2 28: MOV TEMP[2].xy, TEMP[3].xyxx 29: ADD TEMP[4].xy, TEMP[0].xyyy, -TEMP[3].xyyy 30: MUL TEMP[5].xy, TEMP[0].xyyy, IMM[0].zzzz 31: UARL ADDR[0].x, IMM[4].yyyy 32: MOV TEMP[6].xyz, CONST[2][ADDR[0].x].xyzz 33: DP2 TEMP[4].x, TEMP[4].xyyy, TEMP[4].xyyy 34: RSQ TEMP[7].x, TEMP[4].xxxx 35: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[4].xxxx 36: CMP TEMP[7].x, -TEMP[4].xxxx, TEMP[7].xxxx, IMM[0].xxxx 37: ADD TEMP[4].x, TEMP[7].xxxx, IMM[5].xxxx 38: POW TEMP[4].x, TEMP[4].xxxx, IMM[5].yyyy 39: ADD TEMP[4].x, IMM[5].xxxx, TEMP[4].xxxx 40: RCP TEMP[4].x, TEMP[4].xxxx 41: MUL TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx 42: ADD TEMP[7].xy, TEMP[5].xyyy, IMM[5].zzzz 43: MOV TEMP[7].xy, TEMP[7].xyyy 44: TEX TEMP[7], TEMP[7], SAMP[1], RECT 45: FSEQ TEMP[7].x, TEMP[7].xxxx, IMM[5].zzzz 46: AND TEMP[7].x, TEMP[7].xxxx, IMM[5].zzzz 47: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[7].xxxx 48: MUL TEMP[4].xyz, TEMP[6].xyzz, TEMP[4].xxxx 49: ADD TEMP[5].xy, TEMP[5].xyyy, IMM[5].zzzz 50: MOV TEMP[5].xy, TEMP[5].xyyy 51: TEX TEMP[5], TEMP[5], SAMP[0], RECT 52: MOV_SAT TEMP[5].x, TEMP[5].wwww 53: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 54: MOV TEMP[5].x, IMM[4].zzzz 55: BGNLOOP :2 56: ISGE TEMP[6].x, TEMP[5].xxxx, CONST[6].xxxx 57: UIF TEMP[6].xxxx :2 58: BRK 59: ENDIF 60: I2F TEMP[7].x, TEMP[5].xxxx 61: I2F TEMP[8].x, CONST[6].xxxx 62: ADD TEMP[9].x, TEMP[8].xxxx, IMM[5].wwww 63: RCP TEMP[10].x, TEMP[9].xxxx 64: MUL TEMP[11].x, TEMP[7].xxxx, TEMP[10].xxxx 65: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx 66: ADD TEMP[13].xy, TEMP[2].xyyy, -TEMP[0].xyyy 67: MUL TEMP[14].xy, TEMP[12].xxxx, TEMP[13].xyyy 68: MUL TEMP[15].xy, TEMP[14].xyyy, CONST[3].xxxx 69: MAD TEMP[16].xy, TEMP[15].xyyy, IMM[6].xxxx, TEMP[0].xyyy 70: ADD TEMP[17].xy, TEMP[0].xyyy, -TEMP[2].xyyy 71: UARL ADDR[0].x, IMM[4].yyyy 72: MOV TEMP[18].xyz, CONST[2][ADDR[0].x].xyzz 73: DP2 TEMP[19].x, TEMP[17].xyyy, TEMP[17].xyyy 74: RSQ TEMP[20].x, TEMP[19].xxxx 75: MUL TEMP[20].x, TEMP[20].xxxx, TEMP[19].xxxx 76: CMP TEMP[20].x, -TEMP[19].xxxx, TEMP[20].xxxx, IMM[0].xxxx 77: ADD TEMP[21].x, TEMP[20].xxxx, IMM[5].xxxx 78: POW TEMP[22].x, TEMP[21].xxxx, IMM[5].yyyy 79: ADD TEMP[23].x, IMM[5].xxxx, TEMP[22].xxxx 80: RCP TEMP[24].x, TEMP[23].xxxx 81: MUL TEMP[25].x, IMM[0].wwww, TEMP[24].xxxx 82: MAD TEMP[26].xy, TEMP[16].xyyy, IMM[0].zzzz, IMM[5].zzzz 83: MOV TEMP[27].xy, TEMP[26].xyyy 84: TEX TEMP[28], TEMP[27], SAMP[1], RECT 85: FSEQ TEMP[29].x, TEMP[28].xxxx, IMM[5].zzzz 86: AND TEMP[30].x, TEMP[29].xxxx, IMM[5].zzzz 87: MUL TEMP[31].x, TEMP[25].xxxx, TEMP[30].xxxx 88: MUL TEMP[32].xyz, TEMP[18].xyzz, TEMP[31].xxxx 89: MAD TEMP[33].xy, TEMP[16].xyyy, IMM[0].zzzz, IMM[5].zzzz 90: MOV TEMP[34].xy, TEMP[33].xyyy 91: TEX TEMP[35], TEMP[34], SAMP[0], RECT 92: MOV_SAT TEMP[36].x, TEMP[35].wwww 93: MUL TEMP[37].xyz, TEMP[32].xyzz, TEMP[36].xxxx 94: MAD TEMP[4].xyz, TEMP[37].xyzz, CONST[4].xxxx, TEMP[4].xyzz 95: UADD TEMP[5].x, TEMP[5].xxxx, IMM[4].wwww 96: ENDLOOP :2 97: I2F TEMP[2].x, CONST[6].xxxx 98: RCP TEMP[2].x, TEMP[2].xxxx 99: MUL TEMP[2].x, CONST[5].xxxx, TEMP[2].xxxx 100: MUL TEMP[2].xyz, TEMP[4].xyzz, TEMP[2].xxxx 101: MAX TEMP[2].xyz, IMM[6].yyyy, TEMP[2].xyzz 102: POW TEMP[4].x, TEMP[2].xxxx, IMM[6].zzzz 103: POW TEMP[4].y, TEMP[2].yyyy, IMM[6].zzzz 104: POW TEMP[4].z, TEMP[2].zzzz, IMM[6].zzzz 105: MOV TEMP[1].xyz, TEMP[4].xyzx 106: ELSE :2 107: ADD TEMP[2].xy, TEMP[0].xyyy, -TEMP[3].xyyy 108: MUL TEMP[3].xy, TEMP[0].xyyy, IMM[0].zzzz 109: UARL ADDR[0].x, IMM[4].yyyy 110: MOV TEMP[4].xyz, CONST[2][ADDR[0].x].xyzz 111: ADD TEMP[5].xy, TEMP[3].xyyy, IMM[5].zzzz 112: MOV TEMP[5].xy, TEMP[5].xyyy 113: TEX TEMP[5].x, TEMP[5], SAMP[1], RECT 114: FSEQ TEMP[5].x, TEMP[5].xxxx, IMM[5].zzzz 115: AND TEMP[5].x, TEMP[5].xxxx, IMM[5].zzzz 116: MUL TEMP[5].x, IMM[6].wwww, TEMP[5].xxxx 117: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx 118: ADD TEMP[3].xy, TEMP[3].xyyy, IMM[5].zzzz 119: MOV TEMP[3].xy, TEMP[3].xyyy 120: TEX TEMP[3].w, TEMP[3], SAMP[0], RECT 121: MOV_SAT TEMP[3].x, TEMP[3].wwww 122: MUL TEMP[3].xyz, TEMP[4].xyzz, TEMP[3].xxxx 123: DP2 TEMP[2].x, TEMP[2].xyyy, TEMP[2].xyyy 124: RSQ TEMP[4].x, TEMP[2].xxxx 125: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[2].xxxx 126: CMP TEMP[4].x, -TEMP[2].xxxx, TEMP[4].xxxx, IMM[0].xxxx 127: ADD TEMP[2].x, TEMP[4].xxxx, IMM[5].xxxx 128: POW TEMP[2].x, TEMP[2].xxxx, IMM[5].yyyy 129: ADD TEMP[2].x, IMM[5].xxxx, TEMP[2].xxxx 130: RCP TEMP[2].x, TEMP[2].xxxx 131: MAD TEMP[1].xyz, TEMP[3].xyzz, TEMP[2].xxxx, TEMP[1].xyzz 132: ENDIF 133: ENDIF 134: MOV OUT[0].xyz, TEMP[1].xyzx 135: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %31 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 1 %32 = load <16 x i8> addrspace(2)* %31, !tbaa !0 %33 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 2 %34 = load <16 x i8> addrspace(2)* %33, !tbaa !0 %35 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0 %36 = load <32 x i8> addrspace(2)* %35, !tbaa !0 %37 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1 %40 = load <32 x i8> addrspace(2)* %39, !tbaa !0 %41 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1 %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0 %43 = fmul float %15, %29 %44 = fadd float %43, %30 %45 = shl i32 12, 4 %46 = call float @llvm.SI.load.const(<16 x i8> %34, i32 %45) %47 = shl i32 12, 4 %48 = add i32 %47, 4 %49 = call float @llvm.SI.load.const(<16 x i8> %34, i32 %48) %50 = shl i32 12, 4 %51 = add i32 %50, 8 %52 = call float @llvm.SI.load.const(<16 x i8> %34, i32 %51) %53 = shl i32 10, 4 %54 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %53) %55 = shl i32 10, 4 %56 = add i32 %55, 4 %57 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %56) %58 = shl i32 10, 4 %59 = add i32 %58, 8 %60 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %59) %61 = shl i32 10, 4 %62 = add i32 %61, 12 %63 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %62) %64 = shl i32 9, 4 %65 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %64) %66 = shl i32 9, 4 %67 = add i32 %66, 4 %68 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %67) %69 = shl i32 9, 4 %70 = add i32 %69, 8 %71 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %70) %72 = shl i32 9, 4 %73 = add i32 %72, 12 %74 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %73) %75 = shl i32 8, 4 %76 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %75) %77 = shl i32 8, 4 %78 = add i32 %77, 4 %79 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %78) %80 = shl i32 8, 4 %81 = add i32 %80, 8 %82 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %81) %83 = shl i32 8, 4 %84 = add i32 %83, 12 %85 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %84) %86 = fmul float %76, %46 %87 = fmul float %79, %46 %88 = fmul float %82, %46 %89 = fmul float %85, %46 %90 = fmul float %65, %49 %91 = fadd float %90, %86 %92 = fmul float %68, %49 %93 = fadd float %92, %87 %94 = fmul float %71, %49 %95 = fadd float %94, %88 %96 = fmul float %74, %49 %97 = fadd float %96, %89 %98 = fmul float %54, %52 %99 = fadd float %98, %91 %100 = fmul float %57, %52 %101 = fadd float %100, %93 %102 = fmul float %60, %52 %103 = fadd float %102, %95 %104 = fmul float %63, %52 %105 = fadd float %104, %97 %106 = fcmp olt float 0.000000e+00, %103 %107 = sext i1 %106 to i32 %108 = bitcast i32 %107 to float %109 = bitcast float %108 to i32 %110 = icmp ne i32 %109, 0 br i1 %110, label %IF, label %ENDIF IF: ; preds = %main_body %111 = fdiv float 1.000000e+00, %105 %112 = fmul float %99, %111 %113 = fmul float %101, %111 %114 = fmul float %112, 5.000000e-01 %115 = fadd float %114, 5.000000e-01 %116 = fmul float %113, 5.000000e-01 %117 = fadd float %116, 5.000000e-01 %118 = shl i32 22, 4 %119 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %118) %120 = shl i32 22, 4 %121 = add i32 %120, 4 %122 = call float @llvm.SI.load.const(<16 x i8> %32, i32 %121) %123 = fmul float %115, %119 %124 = fmul float %117, %122 %125 = fmul float %123, 5.000000e-01 %126 = fmul float %124, 5.000000e-01 %127 = fcmp olt float 0.000000e+00, %28 %128 = sext i1 %127 to i32 %129 = bitcast i32 %128 to float %130 = bitcast float %129 to i32 %131 = icmp ne i32 %130, 0 %132 = fsub float -0.000000e+00, %125 %133 = fadd float %14, %132 %134 = fsub float -0.000000e+00, %126 %135 = fadd float %44, %134 %136 = fmul float %14, 2.000000e+00 %137 = fmul float %44, 2.000000e+00 %138 = shl i32 13, 4 %139 = call float @llvm.SI.load.const(<16 x i8> %34, i32 %138) %140 = shl i32 13, 4 %141 = add i32 %140, 4 %142 = call float @llvm.SI.load.const(<16 x i8> %34, i32 %141) %143 = shl i32 13, 4 %144 = add i32 %143, 8 %145 = call float @llvm.SI.load.const(<16 x i8> %34, i32 %144) br i1 %131, label %IF169, label %ELSE170 ENDIF: ; preds = %IF175, %ELSE170, %main_body %temp6.0 = phi float [ 0.000000e+00, %main_body ], [ %277, %IF175 ], [ %255, %ELSE170 ] %temp5.0 = phi float [ 0.000000e+00, %main_body ], [ %276, %IF175 ], [ %253, %ELSE170 ] %temp4.0 = phi float [ 0.000000e+00, %main_body ], [ %275, %IF175 ], [ %251, %ELSE170 ] %146 = call i32 @llvm.SI.packf16(float %temp4.0, float %temp5.0) %147 = bitcast i32 %146 to float %148 = call i32 @llvm.SI.packf16(float %temp6.0, float 0.000000e+00) %149 = bitcast i32 %148 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %147, float %149, float %147, float %149) ret void IF169: ; preds = %IF %150 = fmul float %133, %133 %151 = fmul float %135, %135 %152 = fadd float %150, %151 %153 = call float @llvm.AMDGPU.rsq(float %152) %154 = fmul float %153, %152 %155 = fsub float -0.000000e+00, %152 %156 = call float @llvm.AMDGPU.cndlt(float %155, float %154, float 0.000000e+00) %157 = fadd float %156, 1.000000e+02 %158 = call float @llvm.pow.f32(float %157, float 0x3FF4CCCCC0000000) %159 = fadd float 1.000000e+02, %158 %160 = fdiv float 1.000000e+00, %159 %161 = fmul float 1.000000e+01, %160 %162 = fadd float %136, 1.000000e+00 %163 = fadd float %137, 1.000000e+00 %164 = bitcast float %162 to i32 %165 = bitcast float %163 to i32 %166 = insertelement <2 x i32> undef, i32 %164, i32 0 %167 = insertelement <2 x i32> %166, i32 %165, i32 1 %168 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %167, <32 x i8> %40, <16 x i8> %42, i32 5) %169 = extractelement <4 x float> %168, i32 0 %170 = fcmp oeq float %169, 1.000000e+00 %171 = sext i1 %170 to i32 %172 = bitcast i32 %171 to float %173 = bitcast float %172 to i32 %174 = and i32 %173, 1065353216 %175 = bitcast i32 %174 to float %176 = fmul float %161, %175 %177 = fmul float %139, %176 %178 = fmul float %142, %176 %179 = fmul float %145, %176 %180 = fadd float %136, 1.000000e+00 %181 = fadd float %137, 1.000000e+00 %182 = bitcast float %180 to i32 %183 = bitcast float %181 to i32 %184 = insertelement <2 x i32> undef, i32 %182, i32 0 %185 = insertelement <2 x i32> %184, i32 %183, i32 1 %186 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %185, <32 x i8> %36, <16 x i8> %38, i32 5) %187 = extractelement <4 x float> %186, i32 3 %188 = call float @llvm.AMDIL.clamp.(float %187, float 0.000000e+00, float 1.000000e+00) %189 = fmul float %177, %188 %190 = fmul float %178, %188 %191 = fmul float %179, %188 %192 = bitcast float %27 to i32 %193 = bitcast float %27 to i32 %194 = sitofp i32 %193 to float %195 = fadd float %194, -1.000000e+00 %196 = fdiv float 1.000000e+00, %195 %197 = fsub float -0.000000e+00, %14 %198 = fadd float %125, %197 %199 = fsub float -0.000000e+00, %44 %200 = fadd float %126, %199 %201 = fsub float -0.000000e+00, %125 %202 = fadd float %14, %201 %203 = fsub float -0.000000e+00, %126 %204 = fadd float %44, %203 %205 = fmul float %202, %202 %206 = fmul float %204, %204 %207 = fadd float %205, %206 %208 = fsub float -0.000000e+00, %207 br label %LOOP ELSE170: ; preds = %IF %209 = fadd float %136, 1.000000e+00 %210 = fadd float %137, 1.000000e+00 %211 = bitcast float %209 to i32 %212 = bitcast float %210 to i32 %213 = insertelement <2 x i32> undef, i32 %211, i32 0 %214 = insertelement <2 x i32> %213, i32 %212, i32 1 %215 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %214, <32 x i8> %40, <16 x i8> %42, i32 5) %216 = extractelement <4 x float> %215, i32 0 %217 = fcmp oeq float %216, 1.000000e+00 %218 = sext i1 %217 to i32 %219 = bitcast i32 %218 to float %220 = bitcast float %219 to i32 %221 = and i32 %220, 1065353216 %222 = bitcast i32 %221 to float %223 = fmul float 3.500000e+01, %222 %224 = fmul float %139, %223 %225 = fmul float %142, %223 %226 = fmul float %145, %223 %227 = fadd float %136, 1.000000e+00 %228 = fadd float %137, 1.000000e+00 %229 = bitcast float %227 to i32 %230 = bitcast float %228 to i32 %231 = insertelement <2 x i32> undef, i32 %229, i32 0 %232 = insertelement <2 x i32> %231, i32 %230, i32 1 %233 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %232, <32 x i8> %36, <16 x i8> %38, i32 5) %234 = extractelement <4 x float> %233, i32 3 %235 = call float @llvm.AMDIL.clamp.(float %234, float 0.000000e+00, float 1.000000e+00) %236 = fmul float %224, %235 %237 = fmul float %225, %235 %238 = fmul float %226, %235 %239 = fmul float %133, %133 %240 = fmul float %135, %135 %241 = fadd float %239, %240 %242 = call float @llvm.AMDGPU.rsq(float %241) %243 = fmul float %242, %241 %244 = fsub float -0.000000e+00, %241 %245 = call float @llvm.AMDGPU.cndlt(float %244, float %243, float 0.000000e+00) %246 = fadd float %245, 1.000000e+02 %247 = call float @llvm.pow.f32(float %246, float 0x3FF4CCCCC0000000) %248 = fadd float 1.000000e+02, %247 %249 = fdiv float 1.000000e+00, %248 %250 = fmul float %236, %249 %251 = fadd float %250, 0.000000e+00 %252 = fmul float %237, %249 %253 = fadd float %252, 0.000000e+00 %254 = fmul float %238, %249 %255 = fadd float %254, 0.000000e+00 br label %ENDIF LOOP: ; preds = %ENDIF174, %IF169 %temp20.0 = phi float [ 0.000000e+00, %IF169 ], [ %343, %ENDIF174 ] %temp18.0 = phi float [ %191, %IF169 ], [ %340, %ENDIF174 ] %temp17.0 = phi float [ %190, %IF169 ], [ %338, %ENDIF174 ] %temp16.0 = phi float [ %189, %IF169 ], [ %336, %ENDIF174 ] %256 = bitcast float %temp20.0 to i32 %257 = icmp sge i32 %256, %192 %258 = sext i1 %257 to i32 %259 = bitcast i32 %258 to float %260 = bitcast float %259 to i32 %261 = icmp ne i32 %260, 0 br i1 %261, label %IF175, label %ENDIF174 IF175: ; preds = %LOOP %temp16.0.lcssa = phi float [ %temp16.0, %LOOP ] %temp17.0.lcssa = phi float [ %temp17.0, %LOOP ] %temp18.0.lcssa = phi float [ %temp18.0, %LOOP ] %262 = bitcast float %27 to i32 %263 = sitofp i32 %262 to float %264 = fdiv float 1.000000e+00, %263 %265 = fmul float %26, %264 %266 = fmul float %temp16.0.lcssa, %265 %267 = fmul float %temp17.0.lcssa, %265 %268 = fmul float %temp18.0.lcssa, %265 %269 = fcmp uge float 0x3F50624DE0000000, %266 %270 = select i1 %269, float 0x3F50624DE0000000, float %266 %271 = fcmp uge float 0x3F50624DE0000000, %267 %272 = select i1 %271, float 0x3F50624DE0000000, float %267 %273 = fcmp uge float 0x3F50624DE0000000, %268 %274 = select i1 %273, float 0x3F50624DE0000000, float %268 %275 = call float @llvm.pow.f32(float %270, float 0x40019999A0000000) %276 = call float @llvm.pow.f32(float %272, float 0x40019999A0000000) %277 = call float @llvm.pow.f32(float %274, float 0x40019999A0000000) br label %ENDIF ENDIF174: ; preds = %LOOP %278 = bitcast float %temp20.0 to i32 %279 = sitofp i32 %278 to float %280 = fmul float %279, %196 %281 = fmul float %280, %280 %282 = fmul float %281, %198 %283 = fmul float %281, %200 %284 = fmul float %282, %24 %285 = fmul float %283, %24 %286 = fmul float %284, 0x3FE6666660000000 %287 = fadd float %286, %14 %288 = fmul float %285, 0x3FE6666660000000 %289 = fadd float %288, %44 %290 = call float @llvm.SI.load.const(<16 x i8> %34, i32 208) %291 = call float @llvm.SI.load.const(<16 x i8> %34, i32 212) %292 = call float @llvm.SI.load.const(<16 x i8> %34, i32 216) %293 = call float @llvm.AMDGPU.rsq(float %207) %294 = fmul float %293, %207 %295 = call float @llvm.AMDGPU.cndlt(float %208, float %294, float 0.000000e+00) %296 = fadd float %295, 1.000000e+02 %297 = call float @llvm.pow.f32(float %296, float 0x3FF4CCCCC0000000) %298 = fadd float 1.000000e+02, %297 %299 = fdiv float 1.000000e+00, %298 %300 = fmul float 1.000000e+01, %299 %301 = fmul float %287, 2.000000e+00 %302 = fadd float %301, 1.000000e+00 %303 = fmul float %289, 2.000000e+00 %304 = fadd float %303, 1.000000e+00 %305 = bitcast float %302 to i32 %306 = bitcast float %304 to i32 %307 = insertelement <2 x i32> undef, i32 %305, i32 0 %308 = insertelement <2 x i32> %307, i32 %306, i32 1 %309 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %308, <32 x i8> %40, <16 x i8> %42, i32 5) %310 = extractelement <4 x float> %309, i32 0 %311 = fcmp oeq float %310, 1.000000e+00 %312 = sext i1 %311 to i32 %313 = bitcast i32 %312 to float %314 = bitcast float %313 to i32 %315 = and i32 %314, 1065353216 %316 = bitcast i32 %315 to float %317 = fmul float %300, %316 %318 = fmul float %290, %317 %319 = fmul float %291, %317 %320 = fmul float %292, %317 %321 = fmul float %287, 2.000000e+00 %322 = fadd float %321, 1.000000e+00 %323 = fmul float %289, 2.000000e+00 %324 = fadd float %323, 1.000000e+00 %325 = bitcast float %322 to i32 %326 = bitcast float %324 to i32 %327 = insertelement <2 x i32> undef, i32 %325, i32 0 %328 = insertelement <2 x i32> %327, i32 %326, i32 1 %329 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %328, <32 x i8> %36, <16 x i8> %38, i32 5) %330 = extractelement <4 x float> %329, i32 3 %331 = call float @llvm.AMDIL.clamp.(float %330, float 0.000000e+00, float 1.000000e+00) %332 = fmul float %318, %331 %333 = fmul float %319, %331 %334 = fmul float %320, %331 %335 = fmul float %332, %25 %336 = fadd float %335, %temp16.0 %337 = fmul float %333, %25 %338 = fadd float %337, %temp17.0 %339 = fmul float %334, %25 %340 = fadd float %339, %temp18.0 %341 = bitcast float %temp20.0 to i32 %342 = add i32 %341, 1 %343 = bitcast i32 %342 to float br label %LOOP } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.rsq(float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readonly declare float @llvm.pow.f32(float, float) #3 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } attributes #3 = { nounwind readonly } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[8:11], s[0:1], 4 ; C0840104 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s6, s[8:11], 34 ; C2030922 S_LOAD_DWORDX4 s[12:15], s[0:1], 8 ; C0860108 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s16, s[12:15], 48 ; C2080D30 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v0, s16 ; 7E000210 V_MUL_F32_e64 v0, s6, v0, 0, 0, 0, 0 ; D2100000 02020006 S_BUFFER_LOAD_DWORD s6, s[8:11], 38 ; C2030926 S_BUFFER_LOAD_DWORD s17, s[12:15], 49 ; C2088D31 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v1, s17 ; 7E020211 V_MAD_F32 v0, s6, v1, v0, 0, 0, 0, 0 ; D2820000 04020206 S_BUFFER_LOAD_DWORD s6, s[8:11], 42 ; C203092A S_BUFFER_LOAD_DWORD s18, s[12:15], 50 ; C2090D32 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v1, s18 ; 7E020212 V_MAD_F32 v0, s6, v1, v0, 0, 0, 0, 0 ; D2820000 04020206 V_CMP_GT_F32_e64 s[6:7], v0, 0.000000e+00, 0, 0, 0, 0 ; D0080006 02010100 V_MOV_B32_e32 v8, 0.000000e+00 ; 7E100280 V_MOV_B32_e32 v9, v8 ; 7E120308 V_MOV_B32_e32 v10, v8 ; 7E140308 S_AND_SAVEEXEC_B64 s[6:7], s[6:7] ; BE862406 S_XOR_B64 s[6:7], exec, s[6:7] ; 8986067E S_CBRANCH_EXECZ BB0_2 ; BF880000 S_LOAD_DWORDX4 s[36:39], s[0:1], 0 ; C0920100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s0, s[36:39], 40 ; C2002528 S_BUFFER_LOAD_DWORD s1, s[36:39], 41 ; C200A529 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v0, s1 ; 7E000201 V_MAD_F32 v0, v3, s0, v0, 0, 0, 0, 0 ; D2820000 04000103 V_MOV_B32_e32 v3, s16 ; 7E060210 S_BUFFER_LOAD_DWORD s0, s[8:11], 33 ; C2000921 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e64 v1, s0, v3, 0, 0, 0, 0 ; D2100001 02020600 V_MOV_B32_e32 v4, s17 ; 7E080211 S_BUFFER_LOAD_DWORD s0, s[8:11], 37 ; C2000925 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, s0, v4, v1, 0, 0, 0, 0 ; D2820001 04060800 V_MOV_B32_e32 v6, s18 ; 7E0C0212 S_BUFFER_LOAD_DWORD s0, s[8:11], 41 ; C2000929 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, s0, v6, v1, 0, 0, 0, 0 ; D2820001 04060C00 S_BUFFER_LOAD_DWORD s0, s[8:11], 35 ; C2000923 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e64 v5, s0, v3, 0, 0, 0, 0 ; D2100005 02020600 S_BUFFER_LOAD_DWORD s0, s[8:11], 39 ; C2000927 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, s0, v4, v5, 0, 0, 0, 0 ; D2820005 04160800 S_BUFFER_LOAD_DWORD s0, s[8:11], 43 ; C200092B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, s0, v6, v5, 0, 0, 0, 0 ; D2820005 04160C00 V_RCP_F32_e32 v7, v5 ; 7E0E5505 V_MUL_F32_e32 v1, v1, v7 ; 10020F01 V_MAD_F32 v1, v1, 5.000000e-01, 5.000000e-01, 0, 0, 0, 0 ; D2820001 03C1E101 S_BUFFER_LOAD_DWORD s0, s[8:11], 89 ; C2000959 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s0, v1 ; 10020200 V_MUL_F32_e32 v1, 5.000000e-01, v1 ; 100202F0 V_SUB_F32_e32 v5, v0, v1 ; 080A0300 S_BUFFER_LOAD_DWORD s0, s[8:11], 32 ; C2000920 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e64 v3, s0, v3, 0, 0, 0, 0 ; D2100003 02020600 S_BUFFER_LOAD_DWORD s0, s[8:11], 36 ; C2000924 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, s0, v4, v3, 0, 0, 0, 0 ; D2820003 040E0800 S_BUFFER_LOAD_DWORD s0, s[8:11], 40 ; C2000928 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, s0, v6, v3, 0, 0, 0, 0 ; D2820003 040E0C00 V_MUL_F32_e32 v3, v3, v7 ; 10060F03 V_MAD_F32 v3, v3, 5.000000e-01, 5.000000e-01, 0, 0, 0, 0 ; D2820003 03C1E103 S_BUFFER_LOAD_DWORD s0, s[8:11], 88 ; C2000958 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s0, v3 ; 10060600 V_MUL_F32_e32 v3, 5.000000e-01, v3 ; 100606F0 V_SUB_F32_e32 v7, v2, v3 ; 080E0702 V_ADD_F32_e32 v4, v0, v0 ; 06080100 S_BUFFER_LOAD_DWORD s8, s[12:15], 54 ; C2040D36 S_BUFFER_LOAD_DWORD s9, s[12:15], 53 ; C2048D35 S_BUFFER_LOAD_DWORD s10, s[12:15], 52 ; C2050D34 V_ADD_F32_e32 v6, v2, v2 ; 060C0502 S_LOAD_DWORDX4 s[12:15], s[2:3], 4 ; C0860304 S_LOAD_DWORDX8 s[16:23], s[4:5], 8 ; C0C80508 S_LOAD_DWORDX4 s[24:27], s[2:3], 0 ; C08C0300 S_LOAD_DWORDX8 s[28:35], s[4:5], 0 ; C0CE0500 S_BUFFER_LOAD_DWORD s0, s[36:39], 28 ; C200251C S_WAITCNT lgkmcnt(0) ; BF8C007F V_CMP_GT_F32_e64 s[0:1], s0, 0.000000e+00, 0, 0, 0, 0 ; D0080000 02010000 V_CNDMASK_B32_e64 v8, 0, -1, s[0:1], 0, 0, 0, 0 ; D2000008 00018280 V_CMP_EQ_I32_e64 s[0:1], v8, 0, 0, 0, 0, 0 ; D1040000 02010108 S_AND_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802400 S_XOR_B64 s[0:1], exec, s[0:1] ; 8980007E S_CBRANCH_EXECZ BB0_3 ; BF880000 V_ADD_F32_e64 v9, v4, 1.000000e+00, 0, 0, 0, 0 ; D2060009 0201E504 V_ADD_F32_e64 v8, v6, 1.000000e+00, 0, 0, 0, 0 ; D2060008 0201E506 IMAGE_SAMPLE v10, 1, -1, 0, 0, 0, 0, 0, 0, v[8:9], s[16:23], s[12:15] ; F0801100 00640A08 S_WAITCNT vmcnt(0) ; BF8C0770 V_CMP_EQ_F32_e64 s[2:3], v10, 1.000000e+00, 0, 0, 0, 0 ; D0040002 0201E50A V_CNDMASK_B32_e64 v10, 0, -1, s[2:3], 0, 0, 0, 0 ; D200000A 00098280 V_AND_B32_e32 v10, 1065353216, v10 ; 361414F2 V_MUL_F32_e32 v10, 3.500000e+01, v10 ; 101414FF 420C0000 V_MUL_F32_e32 v11, s8, v10 ; 10161408 IMAGE_SAMPLE v8, 8, -1, 0, 0, 0, 0, 0, 0, v[8:9], s[28:35], s[24:27] ; F0801800 00C70808 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e64 v12, 0, v8, 0, 1, 0, 0 ; D206080C 02021080 V_MUL_F32_e32 v8, v11, v12 ; 1010190B V_MUL_F32_e64 v9, v5, v5, 0, 0, 0, 0 ; D2100009 02020B05 V_MAD_F32 v9, v7, v7, v9, 0, 0, 0, 0 ; D2820009 04260F07 V_RSQ_LEGACY_F32_e32 v11, v9 ; 7E165B09 V_MUL_F32_e32 v11, v11, v9 ; 1016130B V_XOR_B32_e32 v9, -2147483648, v9 ; 3A1212FF 80000000 V_CMP_GT_F32_e64 s[2:3], 0, v9, 0, 0, 0, 0 ; D0080002 02021280 V_CNDMASK_B32_e64 v9, 0.000000e+00, v11, s[2:3], 0, 0, 0, 0 ; D2000009 000A1680 V_ADD_F32_e32 v9, 1.000000e+02, v9 ; 061212FF 42C80000 V_LOG_F32_e32 v9, v9 ; 7E124F09 V_MUL_LEGACY_F32_e32 v9, 1.300000e+00, v9 ; 0E1212FF 3FA66666 V_EXP_F32_e32 v9, v9 ; 7E124B09 V_ADD_F32_e32 v9, 1.000000e+02, v9 ; 061212FF 42C80000 V_RCP_F32_e32 v11, v9 ; 7E165509 V_MAD_F32 v8, v8, v11, 0.000000e+00, 0, 0, 0, 0 ; D2820008 02021708 V_MUL_F32_e32 v9, s9, v10 ; 10121409 V_MUL_F32_e32 v9, v9, v12 ; 10121909 V_MAD_F32 v9, v9, v11, 0.000000e+00, 0, 0, 0, 0 ; D2820009 02021709 V_MUL_F32_e32 v10, s10, v10 ; 1014140A V_MUL_F32_e32 v10, v10, v12 ; 1014190A V_MAD_F32 v10, v10, v11, 0.000000e+00, 0, 0, 0, 0 ; D282000A 0202170A S_OR_SAVEEXEC_B64 s[0:1], s[0:1] ; BE802500 S_WAITCNT expcnt(0) ; BF8C070F S_XOR_B64 exec, exec, s[0:1] ; 89FE007E S_CBRANCH_EXECZ BB0_6 ; BF880000 S_BUFFER_LOAD_DWORD s2, s[36:39], 24 ; C2012518 S_BUFFER_LOAD_DWORD s4, s[36:39], 16 ; C2022510 S_BUFFER_LOAD_DWORD s5, s[36:39], 12 ; C202A50C S_BUFFER_LOAD_DWORD s3, s[36:39], 20 ; C201A514 V_MUL_F32_e64 v5, v5, v5, 0, 0, 0, 0 ; D2100005 02020B05 V_MAD_F32 v5, v7, v7, v5, 0, 0, 0, 0 ; D2820005 04160F07 V_RSQ_LEGACY_F32_e32 v7, v5 ; 7E0E5B05 V_MUL_F32_e32 v7, v7, v5 ; 100E0B07 V_XOR_B32_e32 v5, -2147483648, v5 ; 3A0A0AFF 80000000 V_CMP_GT_F32_e64 s[36:37], 0, v5, 0, 0, 0, 0 ; D0080024 02020A80 V_CNDMASK_B32_e64 v5, 0.000000e+00, v7, s[36:37], 0, 0, 0, 0 ; D2000005 00920E80 V_ADD_F32_e32 v5, 1.000000e+02, v5 ; 060A0AFF 42C80000 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_LEGACY_F32_e32 v5, 1.300000e+00, v5 ; 0E0A0AFF 3FA66666 V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_ADD_F32_e32 v5, 1.000000e+02, v5 ; 060A0AFF 42C80000 V_RCP_F32_e32 v5, v5 ; 7E0A5505 V_MUL_F32_e32 v5, 1.000000e+01, v5 ; 100A0AFF 41200000 V_ADD_F32_e64 v8, v4, 1.000000e+00, 0, 0, 0, 0 ; D2060008 0201E504 V_ADD_F32_e64 v7, v6, 1.000000e+00, 0, 0, 0, 0 ; D2060007 0201E506 IMAGE_SAMPLE v4, 1, -1, 0, 0, 0, 0, 0, 0, v[7:8], s[16:23], s[12:15] ; F0801100 00640407 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 V_CMP_EQ_F32_e64 s[36:37], v4, 1.000000e+00, 0, 0, 0, 0 ; D0040024 0201E504 V_CNDMASK_B32_e64 v4, 0, -1, s[36:37], 0, 0, 0, 0 ; D2000004 00918280 V_AND_B32_e32 v4, 1065353216, v4 ; 360808F2 V_MUL_F32_e32 v4, v5, v4 ; 10080905 V_MUL_F32_e32 v5, s8, v4 ; 100A0808 IMAGE_SAMPLE v6, 8, -1, 0, 0, 0, 0, 0, 0, v[7:8], s[28:35], s[24:27] ; F0801800 00C70607 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e64 v6, 0, v6, 0, 1, 0, 0 ; D2060806 02020C80 V_MUL_F32_e32 v12, v5, v6 ; 10180D05 V_MUL_F32_e32 v5, s9, v4 ; 100A0809 V_MUL_F32_e32 v13, v5, v6 ; 101A0D05 V_MUL_F32_e32 v4, s10, v4 ; 1008080A V_MUL_F32_e32 v14, v4, v6 ; 101C0D04 V_SUBREV_F32_e32 v4, v3, v2 ; 0A080503 V_SUB_F32_e64 v5, v0, v1, 0, 0, 0, 0 ; D2080005 02020300 V_MUL_F32_e32 v5, v5, v5 ; 100A0B05 V_MAD_F32 v4, v4, v4, v5, 0, 0, 0, 0 ; D2820004 04160904 V_XOR_B32_e32 v5, -2147483648, v4 ; 3A0A08FF 80000000 V_SUB_F32_e32 v3, v3, v2 ; 08060503 V_SUB_F32_e64 v6, v1, v0, 0, 0, 0, 0 ; D2080006 02020101 V_CVT_F32_I32_e32 v1, s2 ; 7E020A02 V_ADD_F32_e32 v1, -1.000000e+00, v1 ; 060202F3 V_RCP_F32_e32 v7, v1 ; 7E0E5501 V_MOV_B32_e32 v8, 0.000000e+00 ; 7E100280 S_MOV_B64 s[36:37], 0 ; BEA40480 V_MOV_B32_e32 v10, s2 ; 7E140202 V_MOV_B32_e32 v1, v14 ; 7E02030E V_MOV_B32_e32 v9, v13 ; 7E12030D V_MOV_B32_e32 v11, v12 ; 7E16030C V_CMP_GE_I32_e64 s[38:39], v8, v10, 0, 0, 0, 0 ; D10C0026 02021508 V_CNDMASK_B32_e64 v12, 0, -1, s[38:39], 0, 0, 0, 0 ; D200000C 00998280 V_CMP_EQ_I32_e64 s[38:39], v12, 0, 0, 0, 0, 0 ; D1040026 0201010C S_AND_SAVEEXEC_B64 s[38:39], s[38:39] ; BEA62426 S_XOR_B64 s[38:39], exec, s[38:39] ; 89A6267E S_CBRANCH_EXECZ BB0_8 ; BF880000 V_CVT_F32_I32_e32 v12, v8 ; 7E180B08 V_MUL_F32_e32 v12, v7, v12 ; 10181907 V_MUL_F32_e32 v12, v12, v12 ; 1018190C V_MUL_F32_e32 v13, v6, v12 ; 101A1906 V_MUL_F32_e32 v13, s5, v13 ; 101A1A05 V_MOV_B32_e32 v14, 7.000000e-01 ; 7E1C02FF 3F333333 V_MAD_F32 v13, v13, v14, v0, 0, 0, 0, 0 ; D282000D 04021D0D V_ADD_F32_e32 v13, v13, v13 ; 061A1B0D V_ADD_F32_e32 v16, 1.000000e+00, v13 ; 06201AF2 V_MUL_F32_e32 v12, v3, v12 ; 10181903 V_MUL_F32_e32 v12, s5, v12 ; 10181805 V_MAD_F32 v12, v12, v14, v2, 0, 0, 0, 0 ; D282000C 040A1D0C V_ADD_F32_e32 v12, v12, v12 ; 0618190C V_ADD_F32_e32 v15, 1.000000e+00, v12 ; 061E18F2 IMAGE_SAMPLE v12, 1, -1, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0801100 00640C0F S_WAITCNT vmcnt(0) ; BF8C0770 V_CMP_EQ_F32_e64 s[40:41], v12, 1.000000e+00, 0, 0, 0, 0 ; D0040028 0201E50C V_CNDMASK_B32_e64 v12, 0, -1, s[40:41], 0, 0, 0, 0 ; D200000C 00A18280 V_AND_B32_e32 v12, 1065353216, v12 ; 361818F2 V_RSQ_LEGACY_F32_e32 v13, v4 ; 7E1A5B04 V_MUL_F32_e32 v13, v4, v13 ; 101A1B04 V_CMP_GT_F32_e64 s[40:41], 0, v5, 0, 0, 0, 0 ; D0080028 02020A80 V_CNDMASK_B32_e64 v13, 0.000000e+00, v13, s[40:41], 0, 0, 0, 0 ; D200000D 00A21A80 V_ADD_F32_e32 v13, 1.000000e+02, v13 ; 061A1AFF 42C80000 V_LOG_F32_e32 v13, v13 ; 7E1A4F0D V_MUL_LEGACY_F32_e32 v13, 1.300000e+00, v13 ; 0E1A1AFF 3FA66666 V_EXP_F32_e32 v13, v13 ; 7E1A4B0D V_ADD_F32_e32 v13, 1.000000e+02, v13 ; 061A1AFF 42C80000 V_RCP_F32_e32 v13, v13 ; 7E1A550D V_MUL_F32_e32 v13, 1.000000e+01, v13 ; 101A1AFF 41200000 V_MUL_F32_e32 v14, v13, v12 ; 101C190D V_MUL_F32_e32 v12, s8, v14 ; 10181C08 IMAGE_SAMPLE v13, 8, -1, 0, 0, 0, 0, 0, 0, v[15:16], s[28:35], s[24:27] ; F0801800 00C70D0F S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e64 v15, 0, v13, 0, 1, 0, 0 ; D206080F 02021A80 V_MUL_F32_e32 v12, v12, v15 ; 10181F0C V_MAD_F32 v12, v12, s4, v11, 0, 0, 0, 0 ; D282000C 042C090C V_MUL_F32_e32 v13, s9, v14 ; 101A1C09 V_MUL_F32_e32 v13, v13, v15 ; 101A1F0D V_MAD_F32 v13, v13, s4, v9, 0, 0, 0, 0 ; D282000D 0424090D V_MUL_F32_e32 v14, s10, v14 ; 101C1C0A V_MUL_F32_e32 v14, v14, v15 ; 101C1F0E V_MAD_F32 v14, v14, s4, v1, 0, 0, 0, 0 ; D282000E 0404090E V_ADD_I32_e32 v8, 1, v8 ; 4A101081 S_OR_B64 exec, exec, s[38:39] ; 88FE267E S_OR_B64 s[36:37], s[38:39], s[36:37] ; 88A42426 S_ANDN2_B64 exec, exec, s[36:37] ; 8AFE247E S_CBRANCH_EXECNZ BB0_7 ; BF890000 S_OR_B64 exec, exec, s[36:37] ; 88FE247E V_CVT_F32_I32_e32 v0, s2 ; 7E000A02 V_RCP_F32_e32 v0, v0 ; 7E005500 V_MUL_F32_e32 v0, s3, v0 ; 10000003 V_MUL_F32_e32 v2, v11, v0 ; 1004010B V_MOV_B32_e32 v3, 1.000000e-03 ; 7E0602FF 3A83126F V_CMP_LE_F32_e64 s[2:3], v2, v3, 0, 0, 0, 0 ; D0060002 02020702 V_CMP_U_F32_e64 s[4:5], v2, v2, 0, 0, 0, 0 ; D0100004 02020502 S_OR_B64 s[2:3], s[2:3], s[4:5] ; 88820402 V_CNDMASK_B32_e64 v2, v2, v3, s[2:3], 0, 0, 0, 0 ; D2000002 000A0702 V_LOG_F32_e32 v2, v2 ; 7E044F02 V_MUL_LEGACY_F32_e32 v2, 2.200000e+00, v2 ; 0E0404FF 400CCCCD V_EXP_F32_e32 v8, v2 ; 7E104B02 V_MUL_F32_e32 v2, v9, v0 ; 10040109 V_CMP_LE_F32_e64 s[2:3], v2, v3, 0, 0, 0, 0 ; D0060002 02020702 V_CMP_U_F32_e64 s[4:5], v2, v2, 0, 0, 0, 0 ; D0100004 02020502 S_OR_B64 s[2:3], s[2:3], s[4:5] ; 88820402 V_CNDMASK_B32_e64 v2, v2, v3, s[2:3], 0, 0, 0, 0 ; D2000002 000A0702 V_LOG_F32_e32 v2, v2 ; 7E044F02 V_MUL_LEGACY_F32_e32 v2, 2.200000e+00, v2 ; 0E0404FF 400CCCCD V_EXP_F32_e32 v9, v2 ; 7E124B02 V_MUL_F32_e32 v0, v1, v0 ; 10000101 V_CMP_LE_F32_e64 s[2:3], v0, v3, 0, 0, 0, 0 ; D0060002 02020700 V_CMP_U_F32_e64 s[4:5], v0, v0, 0, 0, 0, 0 ; D0100004 02020100 S_OR_B64 s[2:3], s[2:3], s[4:5] ; 88820402 V_CNDMASK_B32_e64 v0, v0, v3, s[2:3], 0, 0, 0, 0 ; D2000000 000A0700 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_LEGACY_F32_e32 v0, 2.200000e+00, v0 ; 0E0000FF 400CCCCD V_EXP_F32_e32 v10, v0 ; 7E144B00 S_OR_B64 exec, exec, s[0:1] ; 88FE007E S_OR_B64 exec, exec, s[6:7] ; 88FE067E V_CVT_PKRTZ_F16_F32_e64 v0, v10, v9, 0, 0, 0, 0 ; D25E0000 0202130A V_CVT_PKRTZ_F16_F32_e64 v1, v8, 0.000000e+00, 0, 0, 0, 0 ; D25E0001 02010108 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[1][0..96] DCL CONST[2][0..68] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[0].xyz, IN[0].xyzx 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %10 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6) %13 = extractelement <4 x float> %12, i32 0 %14 = extractelement <4 x float> %12, i32 1 %15 = extractelement <4 x float> %12, i32 2 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[8:9], 0 ; C0800900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 V_MOV_B32_e32 v4, 1.000000e+00 ; 7E0802F2 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 1, 0, v0, v1, v2, v4 ; F80008CF 04020100 S_ENDPGM ; BF810000 FRAG 0: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) ret void } declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32) attributes #0 = { "ShaderType"="0" } Shader Disassembly: V_MOV_B32_e32 v0, 0 ; 7E000280 EXP 0, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001800 00000000 S_ENDPGM ; BF810000 FRAG DCL IN[0], POSITION, LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[2] DCL TEMP[0] DCL TEMP[1..2], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[2].xxxx, CONST[2].yyyy 2: MOV TEMP[1].xy, TEMP[0].xyyy 3: TEX TEMP[1].xyz, TEMP[1], SAMP[1], RECT 4: MUL TEMP[2].xy, TEMP[0].xyyy, IMM[0].xxxx 5: MOV TEMP[2].xy, TEMP[2].xyyy 6: TEX TEMP[2].xyz, TEMP[2], SAMP[0], RECT 7: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz 8: MOV OUT[0], TEMP[1] 9: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %26 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0 %27 = load <32 x i8> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0 %29 = load <16 x i8> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1 %31 = load <32 x i8> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = fmul float %15, %24 %35 = fadd float %34, %25 %36 = bitcast float %14 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %31, <16 x i8> %33, i32 5) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = fmul float %14, 5.000000e-01 %45 = fmul float %35, 5.000000e-01 %46 = bitcast float %44 to i32 %47 = bitcast float %45 to i32 %48 = insertelement <2 x i32> undef, i32 %46, i32 0 %49 = insertelement <2 x i32> %48, i32 %47, i32 1 %50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %27, <16 x i8> %29, i32 5) %51 = extractelement <4 x float> %50, i32 0 %52 = extractelement <4 x float> %50, i32 1 %53 = extractelement <4 x float> %50, i32 2 %54 = fadd float %41, %51 %55 = fadd float %42, %52 %56 = fadd float %43, %53 %57 = call i32 @llvm.SI.packf16(float %54, float %55) %58 = bitcast i32 %57 to float %59 = call i32 @llvm.SI.packf16(float %56, float 0.000000e+00) %60 = bitcast i32 %59 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %58, float %60, float %58, float %60) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_MOV_B32_e32 v0, v2 ; 7E000302 S_LOAD_DWORDX4 s[8:11], s[0:1], 0 ; C0840100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s0, s[8:11], 8 ; C2000908 S_BUFFER_LOAD_DWORD s1, s[8:11], 9 ; C2008909 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s1 ; 7E040201 V_MAD_F32 v1, v3, s0, v2, 0, 0, 0, 0 ; D2820001 04080103 S_LOAD_DWORDX4 s[8:11], s[2:3], 4 ; C0840304 S_LOAD_DWORDX8 s[12:19], s[4:5], 8 ; C0C60508 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[2:4], 7, -1, 0, 0, 0, 0, 0, 0, v[0:1], s[12:19], s[8:11] ; F0801700 00430200 V_MUL_F32_e32 v6, 5.000000e-01, v1 ; 100C02F0 V_MUL_F32_e32 v5, 5.000000e-01, v0 ; 100A00F0 S_LOAD_DWORDX4 s[0:3], s[2:3], 0 ; C0800300 S_LOAD_DWORDX8 s[4:11], s[4:5], 0 ; C0C20500 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 IMAGE_SAMPLE v[5:7], 7, -1, 0, 0, 0, 0, 0, 0, v[5:6], s[4:11], s[0:3] ; F0801700 00010505 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v0, v3, v6 ; 06000D03 V_ADD_F32_e32 v1, v2, v5 ; 06020B02 V_CVT_PKRTZ_F16_F32_e32 v0, v1, v0 ; 5E000101 V_ADD_F32_e32 v1, v4, v7 ; 06020F04 V_CVT_PKRTZ_F16_F32_e64 v1, v1, 0.000000e+00, 0, 0, 0, 0 ; D25E0001 02010101 EXP 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[0].xyz, IN[0].xyzx 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %10 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6) %13 = extractelement <4 x float> %12, i32 0 %14 = extractelement <4 x float> %12, i32 1 %15 = extractelement <4 x float> %12, i32 2 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[8:9], 0 ; C0800900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 V_MOV_B32_e32 v4, 1.000000e+00 ; 7E0802F2 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 1, 0, v0, v1, v2, v4 ; F80008CF 04020100 S_ENDPGM ; BF810000 FRAG DCL IN[0], POSITION, LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[2] DCL TEMP[0] DCL TEMP[1..4], LOCAL IMM[0] UINT32 {0, 0, 0, 0} IMM[1] FLT32 { 0.0001, 0.0010, 2.2000, 1.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[2].xxxx, CONST[2].yyyy 2: MOV TEMP[1].x, TEMP[0].xxxx 3: TXQ TEMP[2].y, IMM[0].xxxx, SAMP[0], RECT 4: I2F TEMP[2].x, TEMP[2].yyyy 5: ADD TEMP[2].x, TEMP[2].xxxx, -TEMP[0].yyyy 6: MOV TEMP[1].y, TEMP[2].xxxx 7: MOV TEMP[1].xy, TEMP[1].xyyy 8: TEX TEMP[1], TEMP[1], SAMP[0], RECT 9: ADD TEMP[3].x, TEMP[1].wwww, IMM[1].xxxx 10: RCP TEMP[3].x, TEMP[3].xxxx 11: MUL TEMP[2].xyz, TEMP[1].zyxx, TEMP[3].xxxx 12: MAX TEMP[3].xyz, IMM[1].yyyy, TEMP[2].xyzz 13: POW TEMP[4].x, TEMP[3].xxxx, IMM[1].zzzz 14: POW TEMP[4].y, TEMP[3].yyyy, IMM[1].zzzz 15: POW TEMP[4].z, TEMP[3].zzzz, IMM[1].zzzz 16: MOV TEMP[3].xy, TEMP[0].xyyy 17: TEX TEMP[3].xyz, TEMP[3], SAMP[1], RECT 18: ADD TEMP[1].x, IMM[1].wwww, -TEMP[1].wwww 19: LRP TEMP[2].xyz, TEMP[1].xxxx, TEMP[3].xyzz, TEMP[4].xyzz 20: MOV TEMP[1].xyz, TEMP[2].xyzx 21: MOV OUT[0], TEMP[1] 22: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %26 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0 %27 = load <32 x i8> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0 %29 = load <16 x i8> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1 %31 = load <32 x i8> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = fmul float %15, %24 %35 = fadd float %34, %25 %36 = call <4 x i32> @llvm.SI.resinfo(i32 0, <32 x i8> %27, i32 5) %37 = extractelement <4 x i32> %36, i32 1 %38 = bitcast i32 %37 to float %39 = bitcast float %38 to i32 %40 = sitofp i32 %39 to float %41 = fsub float -0.000000e+00, %35 %42 = fadd float %40, %41 %43 = bitcast float %14 to i32 %44 = bitcast float %42 to i32 %45 = insertelement <2 x i32> undef, i32 %43, i32 0 %46 = insertelement <2 x i32> %45, i32 %44, i32 1 %47 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %46, <32 x i8> %27, <16 x i8> %29, i32 5) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = fadd float %51, 0x3F1A36E2E0000000 %53 = fdiv float 1.000000e+00, %52 %54 = fmul float %50, %53 %55 = fmul float %49, %53 %56 = fmul float %48, %53 %57 = fcmp uge float 0x3F50624DE0000000, %54 %58 = select i1 %57, float 0x3F50624DE0000000, float %54 %59 = fcmp uge float 0x3F50624DE0000000, %55 %60 = select i1 %59, float 0x3F50624DE0000000, float %55 %61 = fcmp uge float 0x3F50624DE0000000, %56 %62 = select i1 %61, float 0x3F50624DE0000000, float %56 %63 = call float @llvm.pow.f32(float %58, float 0x40019999A0000000) %64 = call float @llvm.pow.f32(float %60, float 0x40019999A0000000) %65 = call float @llvm.pow.f32(float %62, float 0x40019999A0000000) %66 = bitcast float %14 to i32 %67 = bitcast float %35 to i32 %68 = insertelement <2 x i32> undef, i32 %66, i32 0 %69 = insertelement <2 x i32> %68, i32 %67, i32 1 %70 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %31, <16 x i8> %33, i32 5) %71 = extractelement <4 x float> %70, i32 0 %72 = extractelement <4 x float> %70, i32 1 %73 = extractelement <4 x float> %70, i32 2 %74 = fsub float -0.000000e+00, %51 %75 = fadd float 1.000000e+00, %74 %76 = call float @llvm.AMDGPU.lrp(float %75, float %71, float %63) %77 = call float @llvm.AMDGPU.lrp(float %75, float %72, float %64) %78 = call float @llvm.AMDGPU.lrp(float %75, float %73, float %65) %79 = call i32 @llvm.SI.packf16(float %76, float %77) %80 = bitcast i32 %79 to float %81 = call i32 @llvm.SI.packf16(float %78, float %51) %82 = bitcast i32 %81 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %80, float %82, float %80, float %82) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: readnone declare <4 x i32> @llvm.SI.resinfo(i32, <32 x i8>, i32) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readonly declare float @llvm.pow.f32(float, float) #3 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } attributes #3 = { nounwind readonly } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: V_MOV_B32_e32 v0, v2 ; 7E000302 S_LOAD_DWORDX4 s[8:11], s[0:1], 0 ; C0840100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s0, s[8:11], 8 ; C2000908 S_BUFFER_LOAD_DWORD s1, s[8:11], 9 ; C2008909 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v2, s1 ; 7E040201 V_MAD_F32 v2, v3, s0, v2, 0, 0, 0, 0 ; D2820002 04080103 S_LOAD_DWORDX8 s[8:15], s[4:5], 0 ; C0C40500 V_MOV_B32_e32 v3, 0 ; 7E060280 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_GET_RESINFO v3, 2, 0, 0, 0, 0, 0, 0, 0, v3, s[8:15] ; F0380200 00020303 S_WAITCNT vmcnt(0) ; BF8C0770 V_CVT_F32_I32_e32 v3, v3 ; 7E060B03 V_SUB_F32_e32 v1, v3, v2 ; 08020503 S_LOAD_DWORDX4 s[16:19], s[2:3], 0 ; C0880300 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[3:6], 15, -1, 0, 0, 0, 0, 0, 0, v[0:1], s[8:15], s[16:19] ; F0801F00 00820300 V_MOV_B32_e32 v7, 1.000000e-04 ; 7E0E02FF 38D1B717 S_WAITCNT vmcnt(0) ; BF8C0770 V_ADD_F32_e32 v7, v6, v7 ; 060E0F06 V_RCP_F32_e32 v7, v7 ; 7E0E5507 V_MUL_F32_e32 v8, v4, v7 ; 10100F04 V_MOV_B32_e32 v9, 1.000000e-03 ; 7E1202FF 3A83126F V_CMP_LE_F32_e64 s[0:1], v8, v9, 0, 0, 0, 0 ; D0060000 02021308 V_CMP_U_F32_e64 s[6:7], v8, v8, 0, 0, 0, 0 ; D0100006 02021108 S_OR_B64 s[0:1], s[0:1], s[6:7] ; 88800600 V_CNDMASK_B32_e64 v8, v8, v9, s[0:1], 0, 0, 0, 0 ; D2000008 00021308 V_LOG_F32_e32 v8, v8 ; 7E104F08 V_MUL_LEGACY_F32_e32 v8, 2.200000e+00, v8 ; 0E1010FF 400CCCCD V_EXP_F32_e32 v8, v8 ; 7E104B08 V_SUB_F32_e32 v10, 1.000000e+00, v6 ; 08140CF2 V_SUB_F32_e32 v11, 1.000000e+00, v10 ; 081614F2 V_MUL_F32_e32 v8, v11, v8 ; 1010110B V_MOV_B32_e32 v1, v2 ; 7E020302 S_LOAD_DWORDX4 s[0:3], s[2:3], 4 ; C0800304 S_LOAD_DWORDX8 s[4:11], s[4:5], 8 ; C0C20508 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:2], 7, -1, 0, 0, 0, 0, 0, 0, v[0:1], s[4:11], s[0:3] ; F0801700 00010000 S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v8, v10, v1, v8, 0, 0, 0, 0 ; D2820008 0422030A V_MUL_F32_e32 v12, v5, v7 ; 10180F05 V_CMP_LE_F32_e64 s[0:1], v12, v9, 0, 0, 0, 0 ; D0060000 0202130C V_CMP_U_F32_e64 s[2:3], v12, v12, 0, 0, 0, 0 ; D0100002 0202190C S_OR_B64 s[0:1], s[0:1], s[2:3] ; 88800200 V_CNDMASK_B32_e64 v12, v12, v9, s[0:1], 0, 0, 0, 0 ; D200000C 0002130C V_LOG_F32_e32 v12, v12 ; 7E184F0C V_MUL_LEGACY_F32_e32 v12, 2.200000e+00, v12 ; 0E1818FF 400CCCCD V_EXP_F32_e32 v12, v12 ; 7E184B0C V_MUL_F32_e32 v12, v11, v12 ; 1018190B V_MAD_F32 v12, v10, v0, v12, 0, 0, 0, 0 ; D282000C 0432010A V_CVT_PKRTZ_F16_F32_e32 v8, v12, v8 ; 5E10110C V_MUL_F32_e32 v7, v3, v7 ; 100E0F03 V_CMP_LE_F32_e64 s[0:1], v7, v9, 0, 0, 0, 0 ; D0060000 02021307 V_CMP_U_F32_e64 s[2:3], v7, v7, 0, 0, 0, 0 ; D0100002 02020F07 S_OR_B64 s[0:1], s[0:1], s[2:3] ; 88800200 V_CNDMASK_B32_e64 v7, v7, v9, s[0:1], 0, 0, 0, 0 ; D2000007 00021307 V_LOG_F32_e32 v7, v7 ; 7E0E4F07 V_MUL_LEGACY_F32_e32 v7, 2.200000e+00, v7 ; 0E0E0EFF 400CCCCD V_EXP_F32_e32 v7, v7 ; 7E0E4B07 V_MUL_F32_e32 v7, v11, v7 ; 100E0F0B V_MAD_F32 v0, v10, v2, v7, 0, 0, 0, 0 ; D2820000 041E050A V_CVT_PKRTZ_F16_F32_e32 v0, v0, v6 ; 5E000D00 EXP 15, 0, 1, 1, 1, v8, v0, v8, v0 ; F8001C0F 00080008 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[0].xyz, IN[0].xyzx 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %10 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6) %13 = extractelement <4 x float> %12, i32 0 %14 = extractelement <4 x float> %12, i32 1 %15 = extractelement <4 x float> %12, i32 2 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[8:9], 0 ; C0800900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 V_MOV_B32_e32 v4, 1.000000e+00 ; 7E0802F2 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 1, 0, v0, v1, v2, v4 ; F80008CF 04020100 S_ENDPGM ; BF810000 FRAG DCL IN[0], POSITION, LINEAR DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[2] DCL CONST[0] DCL TEMP[0] DCL TEMP[1..3], LOCAL IMM[0] FLT32 { 1.0000, 0.0010, 0.4545, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[2].xxxx, CONST[2].yyyy 2: MOV TEMP[1].w, IMM[0].xxxx 3: MUL TEMP[2].xy, TEMP[0].xyyy, CONST[0].xyyy 4: MOV TEMP[2].xy, TEMP[2].xyyy 5: TEX TEMP[2].xyz, TEMP[2], SAMP[0], RECT 6: MAX TEMP[2].xyz, IMM[0].yyyy, TEMP[2].xyzz 7: POW TEMP[3].x, TEMP[2].xxxx, IMM[0].zzzz 8: POW TEMP[3].y, TEMP[2].yyyy, IMM[0].zzzz 9: POW TEMP[3].z, TEMP[2].zzzz, IMM[0].zzzz 10: MOV TEMP[1].xyz, TEMP[3].xyzx 11: MOV OUT[0], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0 %29 = load <32 x i8> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0 %31 = load <16 x i8> addrspace(2)* %30, !tbaa !0 %32 = fmul float %15, %26 %33 = fadd float %32, %27 %34 = fmul float %14, %24 %35 = fmul float %33, %25 %36 = bitcast float %34 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %29, <16 x i8> %31, i32 5) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = fcmp uge float 0x3F50624DE0000000, %41 %45 = select i1 %44, float 0x3F50624DE0000000, float %41 %46 = fcmp uge float 0x3F50624DE0000000, %42 %47 = select i1 %46, float 0x3F50624DE0000000, float %42 %48 = fcmp uge float 0x3F50624DE0000000, %43 %49 = select i1 %48, float 0x3F50624DE0000000, float %43 %50 = call float @llvm.pow.f32(float %45, float 0x3FDD1745C0000000) %51 = call float @llvm.pow.f32(float %47, float 0x3FDD1745C0000000) %52 = call float @llvm.pow.f32(float %49, float 0x3FDD1745C0000000) %53 = call i32 @llvm.SI.packf16(float %50, float %51) %54 = bitcast i32 %53 to float %55 = call i32 @llvm.SI.packf16(float %52, float 1.000000e+00) %56 = bitcast i32 %55 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %54, float %56, float %54, float %56) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readonly } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[8:11], s[0:1], 0 ; C0840100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s0, s[8:11], 8 ; C2000908 S_BUFFER_LOAD_DWORD s1, s[8:11], 9 ; C2008909 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v0, s1 ; 7E000201 V_MAD_F32 v0, v3, s0, v0, 0, 0, 0, 0 ; D2820000 04000103 S_BUFFER_LOAD_DWORD s0, s[8:11], 1 ; C2000901 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s0, v0 ; 10020000 S_BUFFER_LOAD_DWORD s0, s[8:11], 0 ; C2000900 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s0, v2 ; 10000400 S_LOAD_DWORDX4 s[0:3], s[2:3], 0 ; C0800300 S_LOAD_DWORDX8 s[4:11], s[4:5], 0 ; C0C20500 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[0:2], 7, -1, 0, 0, 0, 0, 0, 0, v[0:1], s[4:11], s[0:3] ; F0801700 00010000 S_WAITCNT vmcnt(0) ; BF8C0770 V_CMP_U_F32_e64 s[0:1], v1, v1, 0, 0, 0, 0 ; D0100000 02020301 V_MOV_B32_e32 v3, 1.000000e-03 ; 7E0602FF 3A83126F V_CMP_LE_F32_e64 s[2:3], v1, v3, 0, 0, 0, 0 ; D0060002 02020701 S_OR_B64 s[0:1], s[2:3], s[0:1] ; 88800002 V_CNDMASK_B32_e64 v4, v1, v3, s[0:1], 0, 0, 0, 0 ; D2000004 00020701 V_LOG_F32_e32 v4, v4 ; 7E084F04 V_MUL_LEGACY_F32_e32 v4, 4.545454e-01, v4 ; 0E0808FF 3EE8BA2E V_EXP_F32_e32 v4, v4 ; 7E084B04 V_CMP_U_F32_e64 s[0:1], v0, v0, 0, 0, 0, 0 ; D0100000 02020100 V_CMP_LE_F32_e64 s[2:3], v0, v3, 0, 0, 0, 0 ; D0060002 02020700 S_OR_B64 s[0:1], s[2:3], s[0:1] ; 88800002 V_CNDMASK_B32_e64 v5, v0, v3, s[0:1], 0, 0, 0, 0 ; D2000005 00020700 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_LEGACY_F32_e32 v5, 4.545454e-01, v5 ; 0E0A0AFF 3EE8BA2E V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_CVT_PKRTZ_F16_F32_e32 v4, v5, v4 ; 5E080905 V_CMP_U_F32_e64 s[0:1], v2, v2, 0, 0, 0, 0 ; D0100000 02020502 V_CMP_LE_F32_e64 s[2:3], v2, v3, 0, 0, 0, 0 ; D0060002 02020702 S_OR_B64 s[0:1], s[2:3], s[0:1] ; 88800002 V_CNDMASK_B32_e64 v0, v2, v3, s[0:1], 0, 0, 0, 0 ; D2000000 00020702 V_LOG_F32_e32 v0, v0 ; 7E004F00 V_MUL_LEGACY_F32_e32 v0, 4.545454e-01, v0 ; 0E0000FF 3EE8BA2E V_EXP_F32_e32 v0, v0 ; 7E004B00 V_CVT_PKRTZ_F16_F32_e64 v0, v0, 1.000000e+00, 0, 0, 0, 0 ; D25E0000 0201E500 EXP 15, 0, 1, 1, 1, v4, v0, v4, v0 ; F8001C0F 00040004 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[0].xyz, IN[0].xyzx 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %10 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6) %13 = extractelement <4 x float> %12, i32 0 %14 = extractelement <4 x float> %12, i32 1 %15 = extractelement <4 x float> %12, i32 2 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[8:9], 0 ; C0800900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 V_MOV_B32_e32 v4, 1.000000e+00 ; 7E0802F2 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 12, 0, 1, 0, v0, v1, v2, v4 ; F80008CF 04020100 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[20], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..1] DCL CONST[3..16] DCL CONST[1][0..96] DCL CONST[2][0..68] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.5000, 2.0000, -1.0000, -1523613696.0000} IMM[1] FLT32 { -0.0000, 400000.0000, 0.0000, 177.5000} IMM[2] FLT32 { 0.0000, 0.4123, 0.3579, 0.7000} IMM[3] FLT32 { 0.0003, 0.1000, 0.7341, 0.4325} IMM[4] FLT32 { 0.9000, 0.0020, 0.2141, 0.9246} IMM[5] FLT32 { 0.3000, 0.4000, 3.0000, 1.4000} IMM[6] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0].xyyy, IMM[0].xxxx, IMM[0].xxxx 1: ADD TEMP[0].xy, CONST[6].xyyy, TEMP[0].xyyy 2: RCP TEMP[1].x, CONST[7].xxxx 3: MUL TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xxxx 4: MAD TEMP[0].xy, TEMP[0].xyyy, IMM[0].yyyy, IMM[0].zzzz 5: MAD TEMP[1].xyz, CONST[4].xyzz, TEMP[0].xxxx, CONST[3].xyzz 6: MAD TEMP[0].xyz, CONST[5].xyzz, TEMP[0].yyyy, TEMP[1].xyzz 7: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 8: RSQ TEMP[1].x, TEMP[1].xxxx 9: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 10: DP3 TEMP[1].x, TEMP[0].xyzz, IMM[1].xyxx 11: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 12: MUL TEMP[2].x, TEMP[2].xxxx, IMM[0].wwww 13: MAD TEMP[2].x, TEMP[1].xxxx, TEMP[1].xxxx, -TEMP[2].xxxx 14: RSQ TEMP[3].x, TEMP[2].xxxx 15: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[2].xxxx 16: CMP TEMP[3].x, -TEMP[2].xxxx, TEMP[3].xxxx, IMM[1].zzzz 17: ADD TEMP[1].x, -TEMP[1].xxxx, TEMP[3].xxxx 18: MUL TEMP[1].xz, TEMP[1].xxxx, TEMP[0].xyzz 19: MUL TEMP[2].x, IMM[1].wwww, CONST[15].xxxx 20: MUL TEMP[2].xy, CONST[14].xyyy, TEMP[2].xxxx 21: MAD TEMP[3].xy, TEMP[1].xzzz, IMM[2].xxxx, IMM[2].yzzz 22: MAD TEMP[3].xy, TEMP[2].xyyy, IMM[2].xxxx, TEMP[3].xyyy 23: MOV TEMP[3].xy, TEMP[3].xyyy 24: TEX TEMP[3].xyz, TEMP[3], SAMP[0], 2D 25: ADD TEMP[4].x, IMM[0].xxxx, CONST[16].xxxx 26: ADD TEMP[5].x, TEMP[3].xxxx, -TEMP[4].xxxx 27: ADD TEMP[6].x, IMM[2].wwww, CONST[16].xxxx 28: ADD TEMP[4].x, TEMP[6].xxxx, -TEMP[4].xxxx 29: RCP TEMP[4].x, TEMP[4].xxxx 30: MUL_SAT TEMP[4].x, TEMP[5].xxxx, TEMP[4].xxxx 31: MUL TEMP[3].xy, TEMP[3].yzzz, IMM[3].yyyy 32: MAD TEMP[3].xy, TEMP[1].xzzz, IMM[3].xxxx, TEMP[3].xyyy 33: ADD TEMP[3].xy, TEMP[3].xyyy, IMM[3].zwww 34: MAD TEMP[3].xy, TEMP[2].xyyy, IMM[3].xxxx, TEMP[3].xyyy 35: MOV TEMP[3].xy, TEMP[3].xyyy 36: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D 37: MAD TEMP[1].xy, TEMP[1].xzzz, IMM[4].yyyy, IMM[4].zwww 38: MAD TEMP[1].xy, TEMP[2].xyyy, IMM[4].yyyy, TEMP[1].xyyy 39: MOV TEMP[1].xy, TEMP[1].xyyy 40: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 41: MAD TEMP[1].x, TEMP[1].xxxx, IMM[5].xxxx, IMM[5].yyyy 42: MAD TEMP[1].x, TEMP[3].xxxx, IMM[4].xxxx, TEMP[1].xxxx 43: MUL TEMP[2].x, IMM[0].yyyy, TEMP[4].xxxx 44: ADD TEMP[2].x, IMM[5].zzzz, -TEMP[2].xxxx 45: MUL TEMP[2].x, TEMP[4].xxxx, TEMP[2].xxxx 46: MUL TEMP[2].x, TEMP[4].xxxx, TEMP[2].xxxx 47: MUL_SAT TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 48: POW TEMP[1].x, TEMP[1].xxxx, IMM[4].xxxx 49: DP3_SAT TEMP[2].x, TEMP[0].xyzz, CONST[8].xyzz 50: POW TEMP[2].x, TEMP[2].xxxx, CONST[10].xxxx 51: MOV_SAT TEMP[2].x, TEMP[2].xxxx 52: MUL TEMP[2].xyz, CONST[9].xyzz, TEMP[2].xxxx 53: ADD TEMP[3].x, IMM[5].wwww, -TEMP[1].xxxx 54: ADD_SAT TEMP[0].x, IMM[6].xxxx, -TEMP[0].yyyy 55: POW TEMP[0].x, TEMP[0].xxxx, IMM[5].zzzz 56: LRP TEMP[0].xyz, TEMP[0].xxxx, CONST[12].xyzz, CONST[11].xyzz 57: LRP TEMP[0].xyz, TEMP[1].xxxx, CONST[13].xyzz, TEMP[0].xyzz 58: MAD TEMP[0].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[0].xyzz 59: MOV TEMP[0].w, TEMP[1].xxxx 60: MOV OUT[0], TEMP[0] 61: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %56 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0 %57 = load <32 x i8> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0 %59 = load <16 x i8> addrspace(2)* %58, !tbaa !0 %60 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %61 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %62 = fmul float %60, 5.000000e-01 %63 = fadd float %62, 5.000000e-01 %64 = fmul float %61, 5.000000e-01 %65 = fadd float %64, 5.000000e-01 %66 = fadd float %33, %63 %67 = fadd float %34, %65 %68 = fdiv float 1.000000e+00, %35 %69 = fmul float %66, %68 %70 = fmul float %67, %68 %71 = fmul float %69, 2.000000e+00 %72 = fadd float %71, -1.000000e+00 %73 = fmul float %70, 2.000000e+00 %74 = fadd float %73, -1.000000e+00 %75 = fmul float %27, %72 %76 = fadd float %75, %24 %77 = fmul float %28, %72 %78 = fadd float %77, %25 %79 = fmul float %29, %72 %80 = fadd float %79, %26 %81 = fmul float %30, %74 %82 = fadd float %81, %76 %83 = fmul float %31, %74 %84 = fadd float %83, %78 %85 = fmul float %32, %74 %86 = fadd float %85, %80 %87 = fmul float %82, %82 %88 = fmul float %84, %84 %89 = fadd float %88, %87 %90 = fmul float %86, %86 %91 = fadd float %89, %90 %92 = call float @llvm.AMDGPU.rsq(float %91) %93 = fmul float %82, %92 %94 = fmul float %84, %92 %95 = fmul float %86, %92 %96 = fmul float %93, -0.000000e+00 %97 = fmul float %94, 4.000000e+05 %98 = fadd float %97, %96 %99 = fmul float %95, -0.000000e+00 %100 = fadd float %98, %99 %101 = fmul float %93, %93 %102 = fmul float %94, %94 %103 = fadd float %102, %101 %104 = fmul float %95, %95 %105 = fadd float %103, %104 %106 = fmul float %105, 0xC1D6B42000000000 %107 = fsub float -0.000000e+00, %106 %108 = fmul float %100, %100 %109 = fadd float %108, %107 %110 = call float @llvm.AMDGPU.rsq(float %109) %111 = fmul float %110, %109 %112 = fsub float -0.000000e+00, %109 %113 = call float @llvm.AMDGPU.cndlt(float %112, float %111, float 0.000000e+00) %114 = fsub float -0.000000e+00, %100 %115 = fadd float %114, %113 %116 = fmul float %115, %93 %117 = fmul float %115, %95 %118 = fmul float 1.775000e+02, %54 %119 = fmul float %52, %118 %120 = fmul float %53, %118 %121 = fmul float %116, 0x3F06AA7440000000 %122 = fadd float %121, 0x3FDA635620000000 %123 = fmul float %117, 0x3F06AA7440000000 %124 = fadd float %123, 0x3FD6E78680000000 %125 = fmul float %119, 0x3F06AA7440000000 %126 = fadd float %125, %122 %127 = fmul float %120, 0x3F06AA7440000000 %128 = fadd float %127, %124 %129 = bitcast float %126 to i32 %130 = bitcast float %128 to i32 %131 = insertelement <2 x i32> undef, i32 %129, i32 0 %132 = insertelement <2 x i32> %131, i32 %130, i32 1 %133 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %132, <32 x i8> %57, <16 x i8> %59, i32 2) %134 = extractelement <4 x float> %133, i32 0 %135 = extractelement <4 x float> %133, i32 1 %136 = extractelement <4 x float> %133, i32 2 %137 = fadd float 5.000000e-01, %55 %138 = fsub float -0.000000e+00, %137 %139 = fadd float %134, %138 %140 = fadd float 0x3FE6666660000000, %55 %141 = fsub float -0.000000e+00, %137 %142 = fadd float %140, %141 %143 = fdiv float 1.000000e+00, %142 %144 = fmul float %139, %143 %145 = call float @llvm.AMDIL.clamp.(float %144, float 0.000000e+00, float 1.000000e+00) %146 = fmul float %135, 0x3FB99999A0000000 %147 = fmul float %136, 0x3FB99999A0000000 %148 = fmul float %116, 0x3F34BD3EE0000000 %149 = fadd float %148, %146 %150 = fmul float %117, 0x3F34BD3EE0000000 %151 = fadd float %150, %147 %152 = fadd float %149, 0x3FE77E1540000000 %153 = fadd float %151, 0x3FDBAE6EA0000000 %154 = fmul float %119, 0x3F34BD3EE0000000 %155 = fadd float %154, %152 %156 = fmul float %120, 0x3F34BD3EE0000000 %157 = fadd float %156, %153 %158 = bitcast float %155 to i32 %159 = bitcast float %157 to i32 %160 = insertelement <2 x i32> undef, i32 %158, i32 0 %161 = insertelement <2 x i32> %160, i32 %159, i32 1 %162 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %161, <32 x i8> %57, <16 x i8> %59, i32 2) %163 = extractelement <4 x float> %162, i32 0 %164 = fmul float %116, 0x3F60624DE0000000 %165 = fadd float %164, 0x3FCB686A40000000 %166 = fmul float %117, 0x3F60624DE0000000 %167 = fadd float %166, 0x3FED966CC0000000 %168 = fmul float %119, 0x3F60624DE0000000 %169 = fadd float %168, %165 %170 = fmul float %120, 0x3F60624DE0000000 %171 = fadd float %170, %167 %172 = bitcast float %169 to i32 %173 = bitcast float %171 to i32 %174 = insertelement <2 x i32> undef, i32 %172, i32 0 %175 = insertelement <2 x i32> %174, i32 %173, i32 1 %176 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %175, <32 x i8> %57, <16 x i8> %59, i32 2) %177 = extractelement <4 x float> %176, i32 0 %178 = fmul float %177, 0x3FD3333340000000 %179 = fadd float %178, 0x3FD99999A0000000 %180 = fmul float %163, 0x3FECCCCCC0000000 %181 = fadd float %180, %179 %182 = fmul float 2.000000e+00, %145 %183 = fsub float -0.000000e+00, %182 %184 = fadd float 3.000000e+00, %183 %185 = fmul float %145, %184 %186 = fmul float %145, %185 %187 = fmul float %181, %186 %188 = call float @llvm.AMDIL.clamp.(float %187, float 0.000000e+00, float 1.000000e+00) %189 = call float @llvm.pow.f32(float %188, float 0x3FECCCCCC0000000) %190 = fmul float %93, %36 %191 = fmul float %94, %37 %192 = fadd float %191, %190 %193 = fmul float %95, %38 %194 = fadd float %192, %193 %195 = call float @llvm.AMDIL.clamp.(float %194, float 0.000000e+00, float 1.000000e+00) %196 = call float @llvm.pow.f32(float %195, float %42) %197 = call float @llvm.AMDIL.clamp.(float %196, float 0.000000e+00, float 1.000000e+00) %198 = fmul float %39, %197 %199 = fmul float %40, %197 %200 = fmul float %41, %197 %201 = fsub float -0.000000e+00, %189 %202 = fadd float 0x3FF6666660000000, %201 %203 = fsub float -0.000000e+00, %94 %204 = fadd float 1.000000e+00, %203 %205 = call float @llvm.AMDIL.clamp.(float %204, float 0.000000e+00, float 1.000000e+00) %206 = call float @llvm.pow.f32(float %205, float 3.000000e+00) %207 = call float @llvm.AMDGPU.lrp(float %206, float %46, float %43) %208 = call float @llvm.AMDGPU.lrp(float %206, float %47, float %44) %209 = call float @llvm.AMDGPU.lrp(float %206, float %48, float %45) %210 = call float @llvm.AMDGPU.lrp(float %189, float %49, float %207) %211 = call float @llvm.AMDGPU.lrp(float %189, float %50, float %208) %212 = call float @llvm.AMDGPU.lrp(float %189, float %51, float %209) %213 = fmul float %198, %202 %214 = fadd float %213, %210 %215 = fmul float %199, %202 %216 = fadd float %215, %211 %217 = fmul float %200, %202 %218 = fadd float %217, %212 %219 = call i32 @llvm.SI.packf16(float %214, float %216) %220 = bitcast i32 %219 to float %221 = call i32 @llvm.SI.packf16(float %218, float %189) %222 = bitcast i32 %221 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %220, float %222, float %220, float %222) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.rsq(float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readonly declare float @llvm.pow.f32(float, float) #3 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } attributes #3 = { nounwind readonly } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_WQM_B64 exec, exec ; BEFE0A7E S_MOV_B32 m0, s9 ; BEFC0309 V_INTERP_P1_F32 v2, v0, 0, 0, [m0] ; C8080000 V_INTERP_P2_F32 v2, [v2], v1, 0, 0, [m0] ; C8090001 V_MAD_F32 v2, v2, 5.000000e-01, 5.000000e-01, 0, 0, 0, 0 ; D2820002 03C1E102 S_LOAD_DWORDX4 s[12:15], s[0:1], 0 ; C0860100 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s0, s[12:15], 24 ; C2000D18 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v2, s0, v2 ; 06040400 S_BUFFER_LOAD_DWORD s0, s[12:15], 28 ; C2000D1C S_WAITCNT lgkmcnt(0) ; BF8C007F V_RCP_F32_e32 v3, s0 ; 7E065400 V_MUL_F32_e32 v4, v2, v3 ; 10080702 V_MAD_F32 v2, v2, v3, v4, 0, 0, 0, 0 ; D2820002 04120702 V_ADD_F32_e32 v2, -1.000000e+00, v2 ; 060404F3 S_BUFFER_LOAD_DWORD s0, s[12:15], 17 ; C2000D11 S_BUFFER_LOAD_DWORD s1, s[12:15], 13 ; C2008D0D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v4, s1 ; 7E080201 V_MAD_F32 v4, s0, v2, v4, 0, 0, 0, 0 ; D2820004 04120400 V_INTERP_P1_F32 v5, v0, 1, 0, [m0] ; C8140100 V_INTERP_P2_F32 v5, [v5], v1, 1, 0, [m0] ; C8150101 V_MAD_F32 v0, v5, 5.000000e-01, 5.000000e-01, 0, 0, 0, 0 ; D2820000 03C1E105 S_BUFFER_LOAD_DWORD s0, s[12:15], 25 ; C2000D19 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s0, v0 ; 06000000 V_MUL_F32_e32 v1, v0, v3 ; 10020700 V_MAD_F32 v0, v0, v3, v1, 0, 0, 0, 0 ; D2820000 04060700 V_ADD_F32_e32 v0, -1.000000e+00, v0 ; 060000F3 S_BUFFER_LOAD_DWORD s0, s[12:15], 21 ; C2000D15 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, s0, v0, v4, 0, 0, 0, 0 ; D2820001 04120000 S_BUFFER_LOAD_DWORD s0, s[12:15], 16 ; C2000D10 S_BUFFER_LOAD_DWORD s1, s[12:15], 12 ; C2008D0C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v3, s1 ; 7E060201 V_MAD_F32 v3, s0, v2, v3, 0, 0, 0, 0 ; D2820003 040E0400 S_BUFFER_LOAD_DWORD s0, s[12:15], 20 ; C2000D14 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, s0, v0, v3, 0, 0, 0, 0 ; D2820003 040E0000 V_MUL_F32_e32 v4, v3, v3 ; 10080703 V_MAD_F32 v4, v1, v1, v4, 0, 0, 0, 0 ; D2820004 04120301 S_BUFFER_LOAD_DWORD s0, s[12:15], 18 ; C2000D12 S_BUFFER_LOAD_DWORD s1, s[12:15], 14 ; C2008D0E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MOV_B32_e32 v5, s1 ; 7E0A0201 V_MAD_F32 v2, s0, v2, v5, 0, 0, 0, 0 ; D2820002 04160400 S_BUFFER_LOAD_DWORD s0, s[12:15], 22 ; C2000D16 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, s0, v0, v2, 0, 0, 0, 0 ; D2820000 040A0000 V_MAD_F32 v2, v0, v0, v4, 0, 0, 0, 0 ; D2820002 04120100 V_RSQ_LEGACY_F32_e32 v2, v2 ; 7E045B02 V_MUL_F32_e32 v1, v1, v2 ; 10020501 V_MUL_F32_e32 v3, v3, v2 ; 10060503 V_MUL_F32_e32 v4, -0.000000e+00, v3 ; 100806FF 80000000 V_MOV_B32_e32 v5, 4.000000e+05 ; 7E0A02FF 48C35000 V_MAD_F32 v4, v1, v5, v4, 0, 0, 0, 0 ; D2820004 04120B01 V_MUL_F32_e32 v0, v0, v2 ; 10000500 V_MOV_B32_e32 v2, -0.000000e+00 ; 7E0402FF 80000000 V_MAD_F32 v2, v0, v2, v4, 0, 0, 0, 0 ; D2820002 04120500 V_MUL_F32_e32 v4, v3, v3 ; 10080703 V_MAD_F32 v4, v1, v1, v4, 0, 0, 0, 0 ; D2820004 04120301 V_MAD_F32 v4, v0, v0, v4, 0, 0, 0, 0 ; D2820004 04120100 V_MUL_F32_e32 v4, 1.523614e+09, v4 ; 100808FF 4EB5A100 V_MAD_F32 v4, v2, v2, v4, 0, 0, 0, 0 ; D2820004 04120502 V_RSQ_LEGACY_F32_e32 v5, v4 ; 7E0A5B04 V_MUL_F32_e32 v5, v5, v4 ; 100A0905 V_XOR_B32_e32 v4, -2147483648, v4 ; 3A0808FF 80000000 V_CMP_GT_F32_e64 s[0:1], 0, v4, 0, 0, 0, 0 ; D0080000 02020880 V_CNDMASK_B32_e64 v4, 0.000000e+00, v5, s[0:1], 0, 0, 0, 0 ; D2000004 00020A80 V_SUB_F32_e32 v2, v4, v2 ; 08040504 V_MUL_F32_e32 v4, v2, v0 ; 10080102 V_MOV_B32_e32 v5, 3.578812e-01 ; 7E0A02FF 3EB73C34 V_MOV_B32_e32 v6, 4.323165e-05 ; 7E0C02FF 383553A2 V_MAD_F32 v5, v4, v6, v5, 0, 0, 0, 0 ; D2820005 04160D04 S_BUFFER_LOAD_DWORD s0, s[12:15], 60 ; C2000D3C V_MOV_B32_e32 v7, 1.775000e+02 ; 7E0E02FF 43318000 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s0, v7 ; 100E0E00 S_BUFFER_LOAD_DWORD s0, s[12:15], 57 ; C2000D39 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v8, s0, v7 ; 10100E00 V_MAD_F32 v10, v8, v6, v5, 0, 0, 0, 0 ; D282000A 04160D08 V_MUL_F32_e32 v2, v2, v3 ; 10040702 V_MOV_B32_e32 v5, 4.123130e-01 ; 7E0A02FF 3ED31AB1 V_MAD_F32 v5, v2, v6, v5, 0, 0, 0, 0 ; D2820005 04160D02 S_BUFFER_LOAD_DWORD s0, s[12:15], 56 ; C2000D38 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s0, v7 ; 100E0E00 V_MAD_F32 v9, v7, v6, v5, 0, 0, 0, 0 ; D2820009 04160D07 S_LOAD_DWORDX4 s[0:3], s[2:3], 0 ; C0800300 S_LOAD_DWORDX8 s[4:11], s[4:5], 0 ; C0C20500 S_WAITCNT lgkmcnt(0) ; BF8C007F IMAGE_SAMPLE v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[4:11], s[0:3] ; F0800700 00010909 V_MOV_B32_e32 v5, 1.000000e-01 ; 7E0A02FF 3DCCCCCD S_WAITCNT vmcnt(0) ; BF8C0770 V_MUL_F32_e32 v6, v11, v5 ; 100C0B0B V_MOV_B32_e32 v12, 3.164557e-04 ; 7E1802FF 39A5E9F7 V_MAD_F32 v6, v4, v12, v6, 0, 0, 0, 0 ; D2820006 041A1904 V_ADD_F32_e32 v6, 4.325215e-01, v6 ; 060C0CFF 3EDD7375 V_MAD_F32 v14, v8, v12, v6, 0, 0, 0, 0 ; D282000E 041A1908 V_MUL_F32_e32 v5, v10, v5 ; 100A0B0A V_MAD_F32 v5, v2, v12, v5, 0, 0, 0, 0 ; D2820005 04161902 V_ADD_F32_e32 v5, 7.341410e-01, v5 ; 060A0AFF 3F3BF0AA V_MAD_F32 v13, v7, v12, v5, 0, 0, 0, 0 ; D282000D 04161907 IMAGE_SAMPLE v5, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[4:11], s[0:3] ; F0800100 0001050D V_MOV_B32_e32 v6, 9.246124e-01 ; 7E0C02FF 3F6CB366 V_MOV_B32_e32 v12, 2.000000e-03 ; 7E1802FF 3B03126F V_MAD_F32 v4, v4, v12, v6, 0, 0, 0, 0 ; D2820004 041A1904 V_MAD_F32 v14, v8, v12, v4, 0, 0, 0, 0 ; D282000E 04121908 V_MOV_B32_e32 v4, 2.141240e-01 ; 7E0802FF 3E5B4352 V_MAD_F32 v2, v2, v12, v4, 0, 0, 0, 0 ; D2820002 04121902 V_MAD_F32 v13, v7, v12, v2, 0, 0, 0, 0 ; D282000D 040A1907 IMAGE_SAMPLE v2, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[4:11], s[0:3] ; F0800100 0001020D V_MOV_B32_e32 v4, 4.000000e-01 ; 7E0802FF 3ECCCCCD V_MOV_B32_e32 v6, 3.000000e-01 ; 7E0C02FF 3E99999A S_WAITCNT vmcnt(0) ; BF8C0770 V_MAD_F32 v2, v2, v6, v4, 0, 0, 0, 0 ; D2820002 04120D02 V_MOV_B32_e32 v4, 9.000000e-01 ; 7E0802FF 3F666666 V_MAD_F32 v2, v5, v4, v2, 0, 0, 0, 0 ; D2820002 040A0905 S_BUFFER_LOAD_DWORD s0, s[12:15], 64 ; C2000D40 S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e64 v4, s0, 5.000000e-01, 0, 0, 0, 0 ; D2060004 0201E000 V_SUB_F32_e32 v5, v9, v4 ; 080A0909 V_MOV_B32_e32 v6, 7.000000e-01 ; 7E0C02FF 3F333333 V_ADD_F32_e32 v6, s0, v6 ; 060C0C00 V_SUB_F32_e32 v4, v6, v4 ; 08080906 V_RCP_F32_e32 v4, v4 ; 7E085504 V_MUL_F32_e32 v4, v5, v4 ; 10080905 V_ADD_F32_e64 v4, 0, v4, 0, 1, 0, 0 ; D2060804 02020880 V_ADD_F32_e32 v5, v4, v4 ; 060A0904 V_SUB_F32_e32 v5, 3.000000e+00, v5 ; 080A0AFF 40400000 V_MUL_F32_e32 v5, v4, v5 ; 100A0B04 V_MUL_F32_e32 v4, v4, v5 ; 10080B04 V_MUL_F32_e32 v2, v2, v4 ; 10040902 V_ADD_F32_e64 v2, 0, v2, 0, 1, 0, 0 ; D2060802 02020480 V_LOG_F32_e32 v2, v2 ; 7E044F02 V_MUL_LEGACY_F32_e32 v2, 9.000000e-01, v2 ; 0E0404FF 3F666666 V_EXP_F32_e32 v2, v2 ; 7E044B02 V_SUB_F32_e32 v4, 1.000000e+00, v2 ; 080804F2 V_SUB_F32_e32 v5, 1.000000e+00, v1 ; 080A02F2 V_ADD_F32_e64 v5, 0, v5, 0, 1, 0, 0 ; D2060805 02020A80 V_LOG_F32_e32 v5, v5 ; 7E0A4F05 V_MUL_LEGACY_F32_e32 v5, 3.000000e+00, v5 ; 0E0A0AFF 40400000 V_EXP_F32_e32 v5, v5 ; 7E0A4B05 V_SUB_F32_e32 v6, 1.000000e+00, v5 ; 080C0AF2 S_BUFFER_LOAD_DWORD s0, s[12:15], 45 ; C2000D2D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s0, v6 ; 100E0C00 S_BUFFER_LOAD_DWORD s0, s[12:15], 49 ; C2000D31 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v5, s0, v7, 0, 0, 0, 0 ; D2820007 041C0105 V_MUL_F32_e32 v7, v4, v7 ; 100E0F04 S_BUFFER_LOAD_DWORD s0, s[12:15], 53 ; C2000D35 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, v2, s0, v7, 0, 0, 0, 0 ; D2820007 041C0102 V_SUB_F32_e32 v8, 1.400000e+00, v2 ; 081004FF 3FB33333 S_BUFFER_LOAD_DWORD s0, s[12:15], 32 ; C2000D20 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s0, v3 ; 10060600 S_BUFFER_LOAD_DWORD s0, s[12:15], 33 ; C2000D21 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, v1, s0, v3, 0, 0, 0, 0 ; D2820001 040C0101 S_BUFFER_LOAD_DWORD s0, s[12:15], 34 ; C2000D22 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, v0, s0, v1, 0, 0, 0, 0 ; D2820000 04040100 V_ADD_F32_e64 v0, 0, v0, 0, 1, 0, 0 ; D2060800 02020080 V_LOG_F32_e32 v0, v0 ; 7E004F00 S_BUFFER_LOAD_DWORD s0, s[12:15], 40 ; C2000D28 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_LEGACY_F32_e32 v0, s0, v0 ; 0E000000 V_EXP_F32_e32 v0, v0 ; 7E004B00 V_ADD_F32_e64 v0, 0, v0, 0, 1, 0, 0 ; D2060800 02020080 S_BUFFER_LOAD_DWORD s0, s[12:15], 37 ; C2000D25 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v1, s0, v0 ; 10020000 V_MAD_F32 v1, v1, v8, v7, 0, 0, 0, 0 ; D2820001 041E1101 S_BUFFER_LOAD_DWORD s0, s[12:15], 44 ; C2000D2C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s0, v6 ; 10060C00 S_BUFFER_LOAD_DWORD s0, s[12:15], 48 ; C2000D30 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v5, s0, v3, 0, 0, 0, 0 ; D2820003 040C0105 V_MUL_F32_e32 v3, v4, v3 ; 10060704 S_BUFFER_LOAD_DWORD s0, s[12:15], 52 ; C2000D34 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v2, s0, v3, 0, 0, 0, 0 ; D2820003 040C0102 S_BUFFER_LOAD_DWORD s0, s[12:15], 36 ; C2000D24 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s0, v0 ; 100E0000 V_MAD_F32 v3, v7, v8, v3, 0, 0, 0, 0 ; D2820003 040E1107 V_CVT_PKRTZ_F16_F32_e32 v1, v3, v1 ; 5E020303 S_BUFFER_LOAD_DWORD s0, s[12:15], 46 ; C2000D2E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s0, v6 ; 10060C00 S_BUFFER_LOAD_DWORD s0, s[12:15], 50 ; C2000D32 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v5, s0, v3, 0, 0, 0, 0 ; D2820003 040C0105 V_MUL_F32_e32 v3, v4, v3 ; 10060704 S_BUFFER_LOAD_DWORD s0, s[12:15], 54 ; C2000D36 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, v2, s0, v3, 0, 0, 0, 0 ; D2820003 040C0102 S_BUFFER_LOAD_DWORD s0, s[12:15], 38 ; C2000D26 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v0, s0, v0 ; 10000000 V_MAD_F32 v0, v0, v8, v3, 0, 0, 0, 0 ; D2820000 040E1100 V_CVT_PKRTZ_F16_F32_e32 v0, v0, v2 ; 5E000500 EXP 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[20] DCL CONST[1][0..96] DCL CONST[2][0..68] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].w, IMM[0].xxxx 1: MOV TEMP[0].xyz, IN[0].xyzx 2: MOV TEMP[1].xy, IN[0].xyxx 3: MOV OUT[1], TEMP[1] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %10 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 %12 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %11, i32 0, i32 %6) %13 = extractelement <4 x float> %12, i32 0 %14 = extractelement <4 x float> %12, i32 1 %15 = extractelement <4 x float> %12, i32 2 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %13, float %14, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %13, float %14, float %15, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[0:3], s[8:9], 0 ; C0800900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[0:3][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80000000 V_MOV_B32_e32 v4, 0.000000e+00 ; 7E080280 S_WAITCNT vmcnt(0) ; BF8C0770 EXP 15, 32, 0, 0, 0, v0, v1, v4, v4 ; F800020F 04040100 S_WAITCNT expcnt(0) ; BF8C070F V_MOV_B32_e32 v4, 1.000000e+00 ; 7E0802F2 EXP 15, 12, 0, 1, 0, v0, v1, v2, v4 ; F80008CF 04020100 S_ENDPGM ; BF810000 FRAG DCL OUT[0], COLOR DCL CONST[1][0..96] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xyz, IMM[0].xxxx 1: MOV OUT[0], TEMP[0] 2: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } Shader Disassembly: V_MOV_B32_e32 v0, 0.000000e+00 ; 7E000280 EXP 15, 0, 0, 1, 1, v0, v0, v0, v0 ; F800180F 00000000 S_ENDPGM ; BF810000 VERT DCL IN[0] DCL OUT[0], POSITION DCL CONST[0..3] DCL CONST[1][0..96] DCL TEMP[0..4], LOCAL DCL ADDR[0] IMM[0] UINT32 {0, 176, 160, 144} IMM[1] INT32 {11, 10, 9, 8} IMM[2] UINT32 {128, 0, 0, 0} 0: MUL TEMP[0], CONST[0], IN[0].xxxx 1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0] 2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0] 3: ADD TEMP[0], TEMP[0], CONST[3] 4: UARL ADDR[0].x, IMM[1].xxxx 5: MOV TEMP[1], CONST[1][ADDR[0].x] 6: UARL ADDR[0].x, IMM[1].yyyy 7: MOV TEMP[2], CONST[1][ADDR[0].x] 8: UARL ADDR[0].x, IMM[1].zzzz 9: MOV TEMP[3], CONST[1][ADDR[0].x] 10: UARL ADDR[0].x, IMM[1].wwww 11: MOV TEMP[4], CONST[1][ADDR[0].x] 12: MUL TEMP[4], TEMP[4], TEMP[0].xxxx 13: MAD TEMP[3], TEMP[3], TEMP[0].yyyy, TEMP[4] 14: MAD TEMP[2], TEMP[2], TEMP[0].zzzz, TEMP[3] 15: MAD TEMP[0], TEMP[1], TEMP[0].wwww, TEMP[2] 16: MOV OUT[0], TEMP[0] 17: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, [17 x <16 x i8>] addrspace(2)* inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %10 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0 %11 = load <16 x i8> addrspace(2)* %10, !tbaa !0 %12 = call float @llvm.SI.load.const(<16 x i8> %11, i32 0) %13 = call float @llvm.SI.load.const(<16 x i8> %11, i32 4) %14 = call float @llvm.SI.load.const(<16 x i8> %11, i32 8) %15 = call float @llvm.SI.load.const(<16 x i8> %11, i32 12) %16 = call float @llvm.SI.load.const(<16 x i8> %11, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %11, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %11, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %11, i32 28) %20 = call float @llvm.SI.load.const(<16 x i8> %11, i32 32) %21 = call float @llvm.SI.load.const(<16 x i8> %11, i32 36) %22 = call float @llvm.SI.load.const(<16 x i8> %11, i32 40) %23 = call float @llvm.SI.load.const(<16 x i8> %11, i32 44) %24 = call float @llvm.SI.load.const(<16 x i8> %11, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %11, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %11, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %11, i32 60) %28 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 1 %29 = load <16 x i8> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [17 x <16 x i8>] addrspace(2)* %4, i64 0, i32 0 %31 = load <16 x i8> addrspace(2)* %30, !tbaa !0 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %6) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = fmul float %12, %33 %37 = fmul float %13, %33 %38 = fmul float %14, %33 %39 = fmul float %15, %33 %40 = fmul float %16, %34 %41 = fadd float %40, %36 %42 = fmul float %17, %34 %43 = fadd float %42, %37 %44 = fmul float %18, %34 %45 = fadd float %44, %38 %46 = fmul float %19, %34 %47 = fadd float %46, %39 %48 = fmul float %20, %35 %49 = fadd float %48, %41 %50 = fmul float %21, %35 %51 = fadd float %50, %43 %52 = fmul float %22, %35 %53 = fadd float %52, %45 %54 = fmul float %23, %35 %55 = fadd float %54, %47 %56 = fadd float %49, %24 %57 = fadd float %51, %25 %58 = fadd float %53, %26 %59 = fadd float %55, %27 %60 = shl i32 11, 4 %61 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %60) %62 = shl i32 11, 4 %63 = add i32 %62, 4 %64 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %63) %65 = shl i32 11, 4 %66 = add i32 %65, 8 %67 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %66) %68 = shl i32 11, 4 %69 = add i32 %68, 12 %70 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %69) %71 = shl i32 10, 4 %72 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %71) %73 = shl i32 10, 4 %74 = add i32 %73, 4 %75 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %74) %76 = shl i32 10, 4 %77 = add i32 %76, 8 %78 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %77) %79 = shl i32 10, 4 %80 = add i32 %79, 12 %81 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %80) %82 = shl i32 9, 4 %83 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %82) %84 = shl i32 9, 4 %85 = add i32 %84, 4 %86 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %85) %87 = shl i32 9, 4 %88 = add i32 %87, 8 %89 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %88) %90 = shl i32 9, 4 %91 = add i32 %90, 12 %92 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %91) %93 = shl i32 8, 4 %94 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %93) %95 = shl i32 8, 4 %96 = add i32 %95, 4 %97 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %96) %98 = shl i32 8, 4 %99 = add i32 %98, 8 %100 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %99) %101 = shl i32 8, 4 %102 = add i32 %101, 12 %103 = call float @llvm.SI.load.const(<16 x i8> %29, i32 %102) %104 = fmul float %94, %56 %105 = fmul float %97, %56 %106 = fmul float %100, %56 %107 = fmul float %103, %56 %108 = fmul float %83, %57 %109 = fadd float %108, %104 %110 = fmul float %86, %57 %111 = fadd float %110, %105 %112 = fmul float %89, %57 %113 = fadd float %112, %106 %114 = fmul float %92, %57 %115 = fadd float %114, %107 %116 = fmul float %72, %58 %117 = fadd float %116, %109 %118 = fmul float %75, %58 %119 = fadd float %118, %111 %120 = fmul float %78, %58 %121 = fadd float %120, %113 %122 = fmul float %81, %58 %123 = fadd float %122, %115 %124 = fmul float %61, %59 %125 = fadd float %124, %117 %126 = fmul float %64, %59 %127 = fadd float %126, %119 %128 = fmul float %67, %59 %129 = fadd float %128, %121 %130 = fmul float %70, %59 %131 = fadd float %130, %123 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %125, float %127, float %129, float %131) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" } attributes #1 = { nounwind readnone } !0 = metadata !{metadata !"const", null, i32 1} Shader Disassembly: S_LOAD_DWORDX4 s[4:7], s[8:9], 0 ; C0820900 S_WAITCNT lgkmcnt(0) ; BF8C007F BUFFER_LOAD_FORMAT_XYZW v[0:3], s[4:7][v0] + 0 + 0, glc=0, slc=0, tfe=0 ; E00C2000 80010000 S_LOAD_DWORDX4 s[4:7], s[0:1], 0 ; C0820100 S_WAITCNT vmcnt(0) lgkmcnt(0) ; BF8C0070 S_BUFFER_LOAD_DWORD s2, s[4:7], 1 ; C2010501 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v4, s2, v0 ; 10080002 S_BUFFER_LOAD_DWORD s2, s[4:7], 5 ; C2010505 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, s2, v1, v4, 0, 0, 0, 0 ; D2820004 04120202 S_BUFFER_LOAD_DWORD s2, s[4:7], 9 ; C2010509 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, s2, v2, v4, 0, 0, 0, 0 ; D2820004 04120402 S_BUFFER_LOAD_DWORD s2, s[4:7], 13 ; C201050D S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v4, s2, v4 ; 06080802 S_BUFFER_LOAD_DWORD s2, s[4:7], 0 ; C2010500 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s2, v0 ; 100A0002 S_BUFFER_LOAD_DWORD s2, s[4:7], 4 ; C2010504 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, s2, v1, v5, 0, 0, 0, 0 ; D2820005 04160202 S_BUFFER_LOAD_DWORD s2, s[4:7], 8 ; C2010508 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v5, s2, v2, v5, 0, 0, 0, 0 ; D2820005 04160402 S_BUFFER_LOAD_DWORD s2, s[4:7], 12 ; C201050C S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v5, s2, v5 ; 060A0A02 S_LOAD_DWORDX4 s[0:3], s[0:1], 4 ; C0800104 S_WAITCNT lgkmcnt(0) ; BF8C007F S_BUFFER_LOAD_DWORD s8, s[0:3], 35 ; C2040123 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v6, s8, v5 ; 100C0A08 S_BUFFER_LOAD_DWORD s8, s[0:3], 39 ; C2040127 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, s8, v4, v6, 0, 0, 0, 0 ; D2820006 041A0808 S_BUFFER_LOAD_DWORD s8, s[4:7], 2 ; C2040502 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v7, s8, v0 ; 100E0008 S_BUFFER_LOAD_DWORD s8, s[4:7], 6 ; C2040506 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, s8, v1, v7, 0, 0, 0, 0 ; D2820007 041E0208 S_BUFFER_LOAD_DWORD s8, s[4:7], 10 ; C204050A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v7, s8, v2, v7, 0, 0, 0, 0 ; D2820007 041E0408 S_BUFFER_LOAD_DWORD s8, s[4:7], 14 ; C204050E S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v7, s8, v7 ; 060E0E08 S_BUFFER_LOAD_DWORD s8, s[0:3], 43 ; C204012B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v6, s8, v7, v6, 0, 0, 0, 0 ; D2820006 041A0E08 S_BUFFER_LOAD_DWORD s8, s[4:7], 3 ; C2040503 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v8, s8, v0 ; 10100008 S_BUFFER_LOAD_DWORD s8, s[4:7], 7 ; C2040507 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v8, s8, v1, v8, 0, 0, 0, 0 ; D2820008 04220208 S_BUFFER_LOAD_DWORD s8, s[4:7], 11 ; C204050B S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, s8, v2, v8, 0, 0, 0, 0 ; D2820000 04220408 S_BUFFER_LOAD_DWORD s4, s[4:7], 15 ; C202050F S_WAITCNT lgkmcnt(0) ; BF8C007F V_ADD_F32_e32 v0, s4, v0 ; 06000004 S_BUFFER_LOAD_DWORD s4, s[0:3], 47 ; C202012F S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v1, s4, v0, v6, 0, 0, 0, 0 ; D2820001 041A0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 34 ; C2020122 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v2, s4, v5 ; 10040A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 38 ; C2020126 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, s4, v4, v2, 0, 0, 0, 0 ; D2820002 040A0804 S_BUFFER_LOAD_DWORD s4, s[0:3], 42 ; C202012A S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, s4, v7, v2, 0, 0, 0, 0 ; D2820002 040A0E04 S_BUFFER_LOAD_DWORD s4, s[0:3], 46 ; C202012E S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v2, s4, v0, v2, 0, 0, 0, 0 ; D2820002 040A0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 33 ; C2020121 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v3, s4, v5 ; 10060A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 37 ; C2020125 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, s4, v4, v3, 0, 0, 0, 0 ; D2820003 040E0804 S_BUFFER_LOAD_DWORD s4, s[0:3], 41 ; C2020129 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, s4, v7, v3, 0, 0, 0, 0 ; D2820003 040E0E04 S_BUFFER_LOAD_DWORD s4, s[0:3], 45 ; C202012D S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v3, s4, v0, v3, 0, 0, 0, 0 ; D2820003 040E0004 S_BUFFER_LOAD_DWORD s4, s[0:3], 32 ; C2020120 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MUL_F32_e32 v5, s4, v5 ; 100A0A04 S_BUFFER_LOAD_DWORD s4, s[0:3], 36 ; C2020124 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, s4, v4, v5, 0, 0, 0, 0 ; D2820004 04160804 S_BUFFER_LOAD_DWORD s4, s[0:3], 40 ; C2020128 S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v4, s4, v7, v4, 0, 0, 0, 0 ; D2820004 04120E04 S_BUFFER_LOAD_DWORD s0, s[0:3], 44 ; C200012C S_WAITCNT lgkmcnt(0) ; BF8C007F V_MAD_F32 v0, s0, v0, v4, 0, 0, 0, 0 ; D2820000 04120000 EXP 15, 12, 0, 1, 0, v0, v3, v2, v1 ; F80008CF 01020300 S_ENDPGM ; BF810000 FRAG DCL OUT[0], COLOR DCL CONST[1][0..96] DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xyz, IMM[0].xxxx 1: MOV OUT[0], TEMP[0] 2: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } Shader Disassembly: V_CVT_PKRTZ_F16_F32_e64 v0, 0.000000e+00, 0.000000e+00, 0, 0, 0, 0 ; D25E0000 02010080 EXP 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 S_ENDPGM ; BF810000 FRAG DCL IN[0], GENERIC[20], PERSPECTIVE DCL IN[1], GENERIC[21], PERSPECTIVE DCL IN[2], GENERIC[22], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL CONST[2..73] DCL CONST[80..93] DCL CONST[1][0..96] DCL CONST[2][0..68] DCL TEMP[0..42], LOCAL DCL ADDR[0] IMM[0] UINT32 {0, 224, 208, 192} IMM[1] INT32 {14, 13, 12, 15} IMM[2] UINT32 {240, 320, 1, 256} IMM[3] INT32 {20, 16, 6, 5} IMM[4] FLT32 { 0.0000, 1.0000, 0.6931, 0.0010} IMM[5] FLT32 { 2.0000, -1.0039, 0.5000, 3.0000} IMM[6] FLT32 { 1.0500, 0.8000, 0.7213, -1.0000} IMM[7] UINT32 {96, 80, 64, 112} IMM[8] INT32 {4, 7, 10, 9} IMM[9] UINT32 {252, 264, 160, 144} IMM[10] UINT32 {128, 176, 260, 0} IMM[11] INT32 {8, 11, 0, 1} IMM[12] FLT32 { -0.8000, 5.0000, 0.0000, 0.0000} IMM[13] INT32 {2, 3, 0, 0} 0: MOV TEMP[0].xy, IN[1].zwzz 1: MOV TEMP[0].z, IN[2].xxxx 2: UARL ADDR[0].x, IMM[1].xxxx 3: MOV TEMP[1], CONST[1][ADDR[0].x] 4: UARL ADDR[0].x, IMM[1].yyyy 5: MOV TEMP[2], CONST[1][ADDR[0].x] 6: UARL ADDR[0].x, IMM[1].zzzz 7: MOV TEMP[3], CONST[1][ADDR[0].x] 8: MUL TEMP[3], TEMP[3], IN[0].wwww 9: MAD TEMP[2], TEMP[2], IN[1].xxxx, TEMP[3] 10: MAD TEMP[1], TEMP[1], IN[1].yyyy, TEMP[2] 11: UARL ADDR[0].x, IMM[1].wwww 12: MOV TEMP[2], CONST[1][ADDR[0].x] 13: ADD TEMP[1], TEMP[1], TEMP[2] 14: UARL ADDR[0].x, IMM[3].xxxx 15: MOV TEMP[2].xyz, CONST[1][ADDR[0].x].xyzz 16: ADD TEMP[1].xyz, TEMP[1].xyzz, -TEMP[2].xyzz 17: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 18: RSQ TEMP[2].x, TEMP[1].xxxx 19: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[1].xxxx 20: CMP TEMP[2].x, -TEMP[1].xxxx, TEMP[2].xxxx, IMM[4].xxxx 21: RCP TEMP[1].x, CONST[85].xxxx 22: MAD TEMP[1].x, TEMP[2].xxxx, TEMP[1].xxxx, IMM[4].yyyy 23: LG2 TEMP[1].x, TEMP[1].xxxx 24: MUL TEMP[1].x, TEMP[1].xxxx, IMM[4].zzzz 25: LG2 TEMP[2].x, CONST[86].xxxx 26: MUL TEMP[2].x, TEMP[2].xxxx, IMM[4].zzzz 27: RCP TEMP[2].x, TEMP[2].xxxx 28: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx 29: FLR TEMP[2].x, TEMP[1].xxxx 30: EX2 TEMP[2].x, TEMP[2].xxxx 31: FRC TEMP[1].x, TEMP[1].xxxx 32: ABS TEMP[3].xyz, IN[2].yzww 33: ADD TEMP[4].xyz, TEMP[3].xyzz, IMM[4].wwww 34: POW TEMP[5].x, TEMP[4].xxxx, CONST[84].xxxx 35: POW TEMP[5].y, TEMP[4].yyyy, CONST[84].xxxx 36: POW TEMP[5].z, TEMP[4].zzzz, CONST[84].xxxx 37: ADD TEMP[4].x, TEMP[5].xxxx, TEMP[5].yyyy 38: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].zzzz 39: RCP TEMP[4].x, TEMP[4].xxxx 40: MUL TEMP[4].xyz, TEMP[5].xyzz, TEMP[4].xxxx 41: MUL TEMP[5].x, CONST[83].xxxx, TEMP[2].xxxx 42: RCP TEMP[5].x, TEMP[5].xxxx 43: MUL TEMP[5].xy, TEMP[0].zyyy, TEMP[5].xxxx 44: MOV TEMP[5].xy, TEMP[5].xyyy 45: TEX TEMP[5], TEMP[5], SAMP[4], 2D 46: MUL TEMP[6].x, CONST[82].xxxx, TEMP[2].xxxx 47: RCP TEMP[6].x, TEMP[6].xxxx 48: MUL TEMP[6].xy, TEMP[0].xzzz, TEMP[6].xxxx 49: MOV TEMP[6].xy, TEMP[6].xyyy 50: TEX TEMP[6], TEMP[6], SAMP[3], 2D 51: MUL TEMP[7].x, CONST[81].xxxx, TEMP[2].xxxx 52: RCP TEMP[7].x, TEMP[7].xxxx 53: MUL TEMP[7].xy, IN[1].zwww, TEMP[7].xxxx 54: MOV TEMP[7].xy, TEMP[7].xyyy 55: TEX TEMP[7], TEMP[7], SAMP[2], 2D 56: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[4].zzzz 57: MAD TEMP[6].xyz, TEMP[6].xyzz, TEMP[4].yyyy, TEMP[7].xyzz 58: MAD TEMP[4].xyz, TEMP[5].xyzz, TEMP[4].xxxx, TEMP[6].xyzz 59: MUL TEMP[5].x, CONST[81].xxxx, TEMP[2].xxxx 60: RCP TEMP[5].x, TEMP[5].xxxx 61: MUL TEMP[5].xy, IN[1].zwww, TEMP[5].xxxx 62: MOV TEMP[5].xy, TEMP[5].xyyy 63: TEX TEMP[5], TEMP[5], SAMP[5], 2D 64: MAD TEMP[6].xy, TEMP[5].xyyy, IMM[5].xxxx, IMM[5].yyyy 65: MOV TEMP[6].z, TEMP[5].zzzz 66: MUL TEMP[5].x, CONST[82].xxxx, TEMP[2].xxxx 67: RCP TEMP[5].x, TEMP[5].xxxx 68: MUL TEMP[5].xy, TEMP[0].xzzz, TEMP[5].xxxx 69: MOV TEMP[5].xy, TEMP[5].xyyy 70: TEX TEMP[5], TEMP[5], SAMP[6], 2D 71: MAD TEMP[7].xy, TEMP[5].xyyy, IMM[5].xxxx, IMM[5].yyyy 72: MOV TEMP[7].z, TEMP[5].zzzz 73: MUL TEMP[5].x, CONST[83].xxxx, TEMP[2].xxxx 74: RCP TEMP[5].x, TEMP[5].xxxx 75: MUL TEMP[5].xy, TEMP[0].zyyy, TEMP[5].xxxx 76: MOV TEMP[5].xy, TEMP[5].xyyy 77: TEX TEMP[5], TEMP[5], SAMP[7], 2D 78: MAD TEMP[8].xy, TEMP[5].xyyy, IMM[5].xxxx, IMM[5].yyyy 79: MOV TEMP[8].z, TEMP[5].zzzz 80: ADD TEMP[5].x, TEMP[3].xxxx, TEMP[3].yyyy 81: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[3].zzzz 82: RCP TEMP[5].x, TEMP[5].xxxx 83: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xxxx 84: SSG TEMP[5].x, IN[2].yyyy 85: MUL TEMP[5].xyz, TEMP[8].zyxx, TEMP[5].xxxx 86: SSG TEMP[8].x, IN[2].zzzz 87: MUL TEMP[7].xyz, TEMP[7].xzyy, TEMP[8].xxxx 88: SSG TEMP[8].x, IN[2].wwww 89: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[8].xxxx 90: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[3].zzzz 91: MAD TEMP[6].xyz, TEMP[7].xyzz, TEMP[3].yyyy, TEMP[6].xyzz 92: MAD TEMP[3].xyz, TEMP[5].xyzz, TEMP[3].xxxx, TEMP[6].xyzz 93: MOV TEMP[5].xyz, TEMP[4].xyzx 94: MOV TEMP[6].xyz, TEMP[3].xyzx 95: FSLT TEMP[7].x, TEMP[1].xxxx, CONST[87].xxxx 96: UIF TEMP[7].xxxx :0 97: MUL TEMP[2].x, TEMP[2].xxxx, IMM[5].zzzz 98: ABS TEMP[7].xyz, IN[2].yzww 99: ADD TEMP[8].xyz, TEMP[7].xyzz, IMM[4].wwww 100: POW TEMP[9].x, TEMP[8].xxxx, CONST[84].xxxx 101: POW TEMP[9].y, TEMP[8].yyyy, CONST[84].xxxx 102: POW TEMP[9].z, TEMP[8].zzzz, CONST[84].xxxx 103: ADD TEMP[8].x, TEMP[9].xxxx, TEMP[9].yyyy 104: ADD TEMP[8].x, TEMP[8].xxxx, TEMP[9].zzzz 105: RCP TEMP[8].x, TEMP[8].xxxx 106: MUL TEMP[8].xyz, TEMP[9].xyzz, TEMP[8].xxxx 107: MUL TEMP[9].x, CONST[81].xxxx, TEMP[2].xxxx 108: RCP TEMP[9].x, TEMP[9].xxxx 109: MUL TEMP[9].xy, IN[1].zwww, TEMP[9].xxxx 110: MOV TEMP[9].xy, TEMP[9].xyyy 111: TEX TEMP[9], TEMP[9], SAMP[5], 2D 112: MAD TEMP[10].xy, TEMP[9].xyyy, IMM[5].xxxx, IMM[5].yyyy 113: MOV TEMP[10].z, TEMP[9].zzzz 114: MUL TEMP[9].x, CONST[82].xxxx, TEMP[2].xxxx 115: RCP TEMP[9].x, TEMP[9].xxxx 116: MUL TEMP[9].xy, TEMP[0].xzzz, TEMP[9].xxxx 117: MOV TEMP[9].xy, TEMP[9].xyyy 118: TEX TEMP[9], TEMP[9], SAMP[6], 2D 119: MAD TEMP[11].xy, TEMP[9].xyyy, IMM[5].xxxx, IMM[5].yyyy 120: MOV TEMP[11].z, TEMP[9].zzzz 121: MUL TEMP[9].x, CONST[83].xxxx, TEMP[2].xxxx 122: RCP TEMP[9].x, TEMP[9].xxxx 123: MUL TEMP[9].xy, TEMP[0].zyyy, TEMP[9].xxxx 124: MOV TEMP[9].xy, TEMP[9].xyyy 125: TEX TEMP[9], TEMP[9], SAMP[7], 2D 126: MAD TEMP[12].xy, TEMP[9].xyyy, IMM[5].xxxx, IMM[5].yyyy 127: MOV TEMP[12].z, TEMP[9].zzzz 128: ADD TEMP[9].x, TEMP[7].xxxx, TEMP[7].yyyy 129: ADD TEMP[9].x, TEMP[9].xxxx, TEMP[7].zzzz 130: RCP TEMP[9].x, TEMP[9].xxxx 131: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[9].xxxx 132: RCP TEMP[9].x, CONST[87].xxxx 133: MUL_SAT TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx 134: MUL TEMP[9].x, IMM[5].xxxx, TEMP[1].xxxx 135: ADD TEMP[9].x, IMM[5].wwww, -TEMP[9].xxxx 136: MUL TEMP[9].x, TEMP[1].xxxx, TEMP[9].xxxx 137: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx 138: MUL TEMP[9].x, CONST[83].xxxx, TEMP[2].xxxx 139: RCP TEMP[9].x, TEMP[9].xxxx 140: MUL TEMP[9].xy, TEMP[0].zyyy, TEMP[9].xxxx 141: MOV TEMP[9].xy, TEMP[9].xyyy 142: TEX TEMP[9], TEMP[9], SAMP[4], 2D 143: MUL TEMP[13].x, CONST[82].xxxx, TEMP[2].xxxx 144: RCP TEMP[13].x, TEMP[13].xxxx 145: MUL TEMP[0].xy, TEMP[0].xzzz, TEMP[13].xxxx 146: MOV TEMP[0].xy, TEMP[0].xyyy 147: TEX TEMP[0], TEMP[0], SAMP[3], 2D 148: MUL TEMP[2].x, CONST[81].xxxx, TEMP[2].xxxx 149: RCP TEMP[2].x, TEMP[2].xxxx 150: MUL TEMP[2].xy, IN[1].zwww, TEMP[2].xxxx 151: MOV TEMP[2].xy, TEMP[2].xyyy 152: TEX TEMP[2], TEMP[2], SAMP[2], 2D 153: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[8].zzzz 154: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[8].yyyy, TEMP[2].xyzz 155: MAD TEMP[0].xyz, TEMP[9].xyzz, TEMP[8].xxxx, TEMP[0].xyzz 156: LRP TEMP[5].xyz, TEMP[1].xxxx, TEMP[4].xyzz, TEMP[0].xyzz 157: SSG TEMP[0].x, IN[2].yyyy 158: MUL TEMP[0].xyz, TEMP[12].zyxx, TEMP[0].xxxx 159: SSG TEMP[2].x, IN[2].zzzz 160: MUL TEMP[2].xyz, TEMP[11].xzyy, TEMP[2].xxxx 161: SSG TEMP[4].x, IN[2].wwww 162: MUL TEMP[4].xyz, TEMP[10].xyzz, TEMP[4].xxxx 163: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[7].zzzz 164: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].yyyy, TEMP[4].xyzz 165: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[7].xxxx, TEMP[2].xyzz 166: LRP TEMP[6].xyz, TEMP[1].xxxx, TEMP[3].xyzz, TEMP[0].xyzz 167: ENDIF 168: FSLT TEMP[0].x, IMM[5].zzzz, CONST[88].xxxx 169: UIF TEMP[0].xxxx :0 170: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[0].xyzz 171: ENDIF 172: MUL TEMP[0].xyz, CONST[90].xyzz, TEMP[6].xxxx 173: MAD TEMP[0].xyz, CONST[91].xyzz, TEMP[6].yyyy, TEMP[0].xyzz 174: MAD TEMP[0].xyz, CONST[92].xyzz, TEMP[6].zzzz, TEMP[0].xyzz 175: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz 176: RSQ TEMP[1].x, TEMP[1].xxxx 177: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx 178: MOV TEMP[1].xyz, TEMP[0].xyzx 179: MOV TEMP[2].xyz, TEMP[5].xyzx 180: MOV TEMP[3], CONST[80] 181: UARL ADDR[0].x, IMM[1].xxxx 182: MOV TEMP[4], CONST[1][ADDR[0].x] 183: UARL ADDR[0].x, IMM[1].yyyy 184: MOV TEMP[6], CONST[1][ADDR[0].x] 185: UARL ADDR[0].x, IMM[1].zzzz 186: MOV TEMP[7], CONST[1][ADDR[0].x] 187: MUL TEMP[7], TEMP[7], IN[0].wwww 188: MAD TEMP[6], TEMP[6], IN[1].xxxx, TEMP[7] 189: MAD TEMP[4], TEMP[4], IN[1].yyyy, TEMP[6] 190: UARL ADDR[0].x, IMM[1].wwww 191: MOV TEMP[6], CONST[1][ADDR[0].x] 192: ADD TEMP[4], TEMP[4], TEMP[6] 193: MOV TEMP[6].xyz, TEMP[4].xyzx 194: UARL ADDR[0].x, IMM[3].xxxx 195: MOV TEMP[7].xyz, CONST[1][ADDR[0].x].xyzz 196: ADD TEMP[7].xyz, TEMP[4].xyzz, -TEMP[7].xyzz 197: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz 198: RSQ TEMP[8].x, TEMP[8].xxxx 199: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[8].xxxx 200: DP3 TEMP[8].x, TEMP[0].xyzz, TEMP[7].xyzz 201: MUL TEMP[8].xyz, TEMP[8].xxxx, TEMP[0].xyzz 202: MUL TEMP[8].xyz, IMM[5].xxxx, TEMP[8].xyzz 203: ADD TEMP[7].xyz, TEMP[7].xyzz, -TEMP[8].xyzz 204: MOV TEMP[8].xyz, TEMP[7].xyzx 205: ADD_SAT TEMP[9].x, TEMP[0].yyyy, IMM[6].xxxx 206: UARL ADDR[0].x, IMM[1].xxxx 207: MOV TEMP[10].xyz, CONST[2][ADDR[0].x].xyzz 208: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[10].xyzz 209: MUL TEMP[9].xyz, TEMP[5].xyzz, TEMP[9].xyzz 210: UARL ADDR[0].x, IMM[1].zzzz 211: UARL ADDR[0].x, IMM[1].zzzz 212: MOV TEMP[10].xyz, CONST[2][ADDR[0].x].xyzz 213: UARL ADDR[0].x, IMM[1].yyyy 214: UARL ADDR[0].x, IMM[1].yyyy 215: MOV TEMP[11].xyz, CONST[2][ADDR[0].x].xyzz 216: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[10].xyzz 217: MAX TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx 218: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[11].xyzz 219: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].xyzz 220: DP3 TEMP[5].x, TEMP[7].xyzz, TEMP[10].xyzz 221: MAX TEMP[5].x, IMM[4].wwww, TEMP[5].xxxx 222: POW TEMP[5].x, TEMP[5].xxxx, CONST[80].wwww 223: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[11].xyzz 224: MAD TEMP[0].xyz, TEMP[5].xyzz, CONST[80].xyzz, TEMP[0].xyzz 225: UARL ADDR[0].x, IMM[1].wwww 226: MOV TEMP[5].xyz, CONST[2][ADDR[0].x].xyzz 227: ADD TEMP[5].xyz, TEMP[4].xyzz, -TEMP[5].xyzz 228: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[5].xyzz 229: RSQ TEMP[7].x, TEMP[5].xxxx 230: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[5].xxxx 231: CMP TEMP[5].x, -TEMP[5].xxxx, TEMP[7].xxxx, IMM[4].xxxx 232: MOV TEMP[7].x, IMM[4].xxxx 233: UARL ADDR[0].x, IMM[3].yyyy 234: UARL ADDR[0].x, IMM[3].yyyy 235: MOV TEMP[10].x, CONST[2][ADDR[0].x].xxxx 236: MUL TEMP[10].x, IMM[6].yyyy, TEMP[10].xxxx 237: FSLT TEMP[10].x, TEMP[5].xxxx, TEMP[10].xxxx 238: UIF TEMP[10].xxxx :0 239: UARL ADDR[0].x, IMM[3].zzzz 240: MOV TEMP[10], CONST[2][ADDR[0].x] 241: UARL ADDR[0].x, IMM[3].wwww 242: MOV TEMP[11], CONST[2][ADDR[0].x] 243: UARL ADDR[0].x, IMM[8].xxxx 244: MOV TEMP[12], CONST[2][ADDR[0].x] 245: MUL TEMP[12], TEMP[12], TEMP[4].xxxx 246: MAD TEMP[11], TEMP[11], TEMP[4].yyyy, TEMP[12] 247: MAD TEMP[10], TEMP[10], TEMP[4].zzzz, TEMP[11] 248: UARL ADDR[0].x, IMM[8].yyyy 249: MOV TEMP[11], CONST[2][ADDR[0].x] 250: ADD TEMP[10], TEMP[10], TEMP[11] 251: UARL ADDR[0].x, IMM[1].wwww 252: MOV TEMP[11].x, CONST[2][ADDR[0].x].wwww 253: MAX TEMP[12].x, TEMP[10].zzzz, IMM[6].wwww 254: MIN TEMP[12].x, TEMP[12].xxxx, IMM[4].yyyy 255: ADD TEMP[12].x, TEMP[12].xxxx, IMM[4].yyyy 256: MUL TEMP[11].x, -TEMP[11].xxxx, TEMP[12].xxxx 257: MUL TEMP[11].x, IMM[6].zzzz, TEMP[11].xxxx 258: EX2 TEMP[11].x, TEMP[11].xxxx 259: MAD TEMP[10].xy, TEMP[10].xyyy, IMM[5].zzzz, IMM[5].zzzz 260: UARL ADDR[0].x, IMM[3].yyyy 261: MOV TEMP[12].x, CONST[2][ADDR[0].x].zzzz 262: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx 263: MOV TEMP[10].xy, TEMP[10].xyyy 264: TEX TEMP[10], TEMP[10], SAMP[0], RECT 265: MUL_SAT TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx 266: MOV TEMP[7].x, TEMP[10].xxxx 267: ELSE :0 268: UARL ADDR[0].x, IMM[3].yyyy 269: UARL ADDR[0].x, IMM[3].yyyy 270: MOV TEMP[10].x, CONST[2][ADDR[0].x].xxxx 271: FSLT TEMP[10].x, TEMP[10].xxxx, TEMP[5].xxxx 272: UIF TEMP[10].xxxx :0 273: UARL ADDR[0].x, IMM[8].zzzz 274: MOV TEMP[10], CONST[2][ADDR[0].x] 275: UARL ADDR[0].x, IMM[8].wwww 276: MOV TEMP[11], CONST[2][ADDR[0].x] 277: UARL ADDR[0].x, IMM[11].xxxx 278: MOV TEMP[12], CONST[2][ADDR[0].x] 279: MUL TEMP[12], TEMP[12], TEMP[4].xxxx 280: MAD TEMP[11], TEMP[11], TEMP[4].yyyy, TEMP[12] 281: MAD TEMP[10], TEMP[10], TEMP[4].zzzz, TEMP[11] 282: UARL ADDR[0].x, IMM[11].yyyy 283: MOV TEMP[11], CONST[2][ADDR[0].x] 284: ADD TEMP[10], TEMP[10], TEMP[11] 285: UARL ADDR[0].x, IMM[1].wwww 286: MOV TEMP[11].xyz, CONST[2][ADDR[0].x].xyzz 287: ADD TEMP[11].xyz, TEMP[4].xyzz, -TEMP[11].xyzz 288: DP3 TEMP[11].x, TEMP[11].xyzz, TEMP[11].xyzz 289: RSQ TEMP[12].x, TEMP[11].xxxx 290: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx 291: CMP TEMP[12].x, -TEMP[11].xxxx, TEMP[12].xxxx, IMM[4].xxxx 292: UARL ADDR[0].x, IMM[3].yyyy 293: MOV TEMP[11].x, CONST[2][ADDR[0].x].yyyy 294: RCP TEMP[11].x, TEMP[11].xxxx 295: MAD TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx, IMM[12].xxxx 296: MUL_SAT TEMP[11].x, TEMP[11].xxxx, IMM[12].yyyy 297: UARL ADDR[0].x, IMM[1].wwww 298: MOV TEMP[12].x, CONST[2][ADDR[0].x].wwww 299: MAX TEMP[13].x, TEMP[10].zzzz, IMM[6].wwww 300: MIN TEMP[13].x, TEMP[13].xxxx, IMM[4].yyyy 301: ADD TEMP[13].x, TEMP[13].xxxx, IMM[4].yyyy 302: MUL TEMP[12].x, -TEMP[12].xxxx, TEMP[13].xxxx 303: MUL TEMP[12].x, IMM[6].zzzz, TEMP[12].xxxx 304: EX2 TEMP[12].x, TEMP[12].xxxx 305: MAD TEMP[10].xy, TEMP[10].xyyy, IMM[5].zzzz, IMM[5].zzzz 306: UARL ADDR[0].x, IMM[3].yyyy 307: MOV TEMP[13].x, CONST[2][ADDR[0].x].zzzz 308: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[13].xxxx 309: MOV TEMP[10].xy, TEMP[10].xyyy 310: TEX TEMP[10], TEMP[10], SAMP[1], RECT 311: MUL_SAT TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx 312: MUL TEMP[12].x, IMM[5].xxxx, TEMP[11].xxxx 313: ADD TEMP[12].x, IMM[5].wwww, -TEMP[12].xxxx 314: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[12].xxxx 315: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx 316: LRP TEMP[7].x, TEMP[11].xxxx, IMM[4].yyyy, TEMP[10].xxxx 317: ELSE :0 318: UARL ADDR[0].x, IMM[3].zzzz 319: MOV TEMP[10], CONST[2][ADDR[0].x] 320: UARL ADDR[0].x, IMM[3].wwww 321: MOV TEMP[11], CONST[2][ADDR[0].x] 322: UARL ADDR[0].x, IMM[8].xxxx 323: MOV TEMP[12], CONST[2][ADDR[0].x] 324: MUL TEMP[12], TEMP[12], TEMP[4].xxxx 325: MAD TEMP[11], TEMP[11], TEMP[4].yyyy, TEMP[12] 326: MAD TEMP[10], TEMP[10], TEMP[4].zzzz, TEMP[11] 327: UARL ADDR[0].x, IMM[8].yyyy 328: MOV TEMP[11], CONST[2][ADDR[0].x] 329: ADD TEMP[10], TEMP[10], TEMP[11] 330: UARL ADDR[0].x, IMM[8].zzzz 331: MOV TEMP[11], CONST[2][ADDR[0].x] 332: UARL ADDR[0].x, IMM[8].wwww 333: MOV TEMP[12], CONST[2][ADDR[0].x] 334: UARL ADDR[0].x, IMM[11].xxxx 335: MOV TEMP[13], CONST[2][ADDR[0].x] 336: MUL TEMP[13], TEMP[13], TEMP[4].xxxx 337: MAD TEMP[12], TEMP[12], TEMP[4].yyyy, TEMP[13] 338: MAD TEMP[4], TEMP[11], TEMP[4].zzzz, TEMP[12] 339: UARL ADDR[0].x, IMM[11].yyyy 340: MOV TEMP[11], CONST[2][ADDR[0].x] 341: ADD TEMP[4], TEMP[4], TEMP[11] 342: UARL ADDR[0].x, IMM[3].yyyy 343: UARL ADDR[0].x, IMM[3].yyyy 344: MOV TEMP[11].x, CONST[2][ADDR[0].x].xxxx 345: MUL TEMP[12].x, IMM[6].yyyy, TEMP[11].xxxx 346: ADD TEMP[5].x, TEMP[5].xxxx, -TEMP[12].xxxx 347: ADD TEMP[11].x, TEMP[11].xxxx, -TEMP[12].xxxx 348: RCP TEMP[11].x, TEMP[11].xxxx 349: MUL_SAT TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx 350: UARL ADDR[0].x, IMM[1].wwww 351: UARL ADDR[0].x, IMM[1].wwww 352: MOV TEMP[11].x, CONST[2][ADDR[0].x].wwww 353: UARL ADDR[0].x, IMM[3].yyyy 354: UARL ADDR[0].x, IMM[3].yyyy 355: MOV TEMP[12].x, CONST[2][ADDR[0].x].zzzz 356: MAX TEMP[13].x, TEMP[10].zzzz, IMM[6].wwww 357: MIN TEMP[13].x, TEMP[13].xxxx, IMM[4].yyyy 358: ADD TEMP[13].x, TEMP[13].xxxx, IMM[4].yyyy 359: MUL TEMP[13].x, -TEMP[11].xxxx, TEMP[13].xxxx 360: MUL TEMP[13].x, IMM[6].zzzz, TEMP[13].xxxx 361: EX2 TEMP[13].x, TEMP[13].xxxx 362: MAD TEMP[10].xy, TEMP[10].xyyy, IMM[5].zzzz, IMM[5].zzzz 363: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx 364: MOV TEMP[10].xy, TEMP[10].xyyy 365: TEX TEMP[10], TEMP[10], SAMP[0], RECT 366: MUL_SAT TEMP[10].x, TEMP[13].xxxx, TEMP[10].xxxx 367: MAX TEMP[13].x, TEMP[4].zzzz, IMM[6].wwww 368: MIN TEMP[13].x, TEMP[13].xxxx, IMM[4].yyyy 369: ADD TEMP[13].x, TEMP[13].xxxx, IMM[4].yyyy 370: MUL TEMP[11].x, -TEMP[11].xxxx, TEMP[13].xxxx 371: MUL TEMP[11].x, IMM[6].zzzz, TEMP[11].xxxx 372: EX2 TEMP[11].x, TEMP[11].xxxx 373: MAD TEMP[4].xy, TEMP[4].xyyy, IMM[5].zzzz, IMM[5].zzzz 374: MUL TEMP[4].xy, TEMP[4].xyyy, TEMP[12].xxxx 375: MOV TEMP[4].xy, TEMP[4].xyyy 376: TEX TEMP[4], TEMP[4], SAMP[1], RECT 377: MUL_SAT TEMP[4].x, TEMP[11].xxxx, TEMP[4].xxxx 378: MUL TEMP[11].x, IMM[5].xxxx, TEMP[5].xxxx 379: ADD TEMP[11].x, IMM[5].wwww, -TEMP[11].xxxx 380: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[11].xxxx 381: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx 382: LRP TEMP[7].x, TEMP[5].xxxx, TEMP[4].xxxx, TEMP[10].xxxx 383: ENDIF 384: ENDIF 385: ADD TEMP[4].x, TEMP[7].xxxx, IMM[12].xxxx 386: MUL_SAT TEMP[4].x, TEMP[4].xxxx, IMM[12].yyyy 387: MUL TEMP[5].x, IMM[5].xxxx, TEMP[4].xxxx 388: ADD TEMP[5].x, IMM[5].wwww, -TEMP[5].xxxx 389: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[5].xxxx 390: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 391: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx 392: ADD TEMP[9].xyz, TEMP[9].xyzz, TEMP[0].xyzz 393: MOV TEMP[0].x, IMM[11].zzzz 394: BGNLOOP :0 395: ISGE TEMP[4].x, TEMP[0].xxxx, CONST[3].xxxx 396: UIF TEMP[4].xxxx :0 397: BRK 398: ENDIF 399: UMUL TEMP[5].x, TEMP[0].xxxx, IMM[8].yyyy 400: UARL ADDR[0].x, TEMP[5].xxxx 401: MOV TEMP[7].x, CONST[ADDR[0].x+4].xxxx 402: UADD TEMP[10].x, TEMP[5].xxxx, IMM[11].wwww 403: UARL ADDR[0].x, TEMP[10].xxxx 404: MOV TEMP[7].y, CONST[ADDR[0].x+4].xxxx 405: UADD TEMP[11].x, TEMP[5].xxxx, IMM[13].xxxx 406: UARL ADDR[0].x, TEMP[11].xxxx 407: MOV TEMP[7].z, CONST[ADDR[0].x+4].xxxx 408: UADD TEMP[12].x, TEMP[5].xxxx, IMM[13].yyyy 409: UARL ADDR[0].x, TEMP[12].xxxx 410: MOV TEMP[13].x, CONST[ADDR[0].x+4].xxxx 411: UADD TEMP[14].x, TEMP[5].xxxx, IMM[8].xxxx 412: UARL ADDR[0].x, TEMP[14].xxxx 413: MOV TEMP[13].y, CONST[ADDR[0].x+4].xxxx 414: UADD TEMP[15].x, TEMP[5].xxxx, IMM[3].wwww 415: UARL ADDR[0].x, TEMP[15].xxxx 416: MOV TEMP[13].z, CONST[ADDR[0].x+4].xxxx 417: UADD TEMP[16].x, TEMP[5].xxxx, IMM[3].zzzz 418: UARL ADDR[0].x, TEMP[16].xxxx 419: MOV TEMP[17].x, CONST[ADDR[0].x+4].xxxx 420: ADD TEMP[18].xyz, TEMP[7].xyzz, -TEMP[6].xyzz 421: DP3 TEMP[19].x, TEMP[18].xyzz, TEMP[18].xyzz 422: RSQ TEMP[20].x, TEMP[19].xxxx 423: MUL TEMP[20].x, TEMP[20].xxxx, TEMP[19].xxxx 424: CMP TEMP[21].x, -TEMP[19].xxxx, TEMP[20].xxxx, IMM[4].xxxx 425: RCP TEMP[22].x, TEMP[21].xxxx 426: MUL TEMP[23].xyz, TEMP[18].xyzz, TEMP[22].xxxx 427: DP3 TEMP[24].x, TEMP[1].xyzz, TEMP[23].xyzz 428: MAX TEMP[25].x, IMM[4].xxxx, TEMP[24].xxxx 429: MUL TEMP[26].xyz, TEMP[25].xxxx, TEMP[13].xyzz 430: MUL TEMP[27].xyz, TEMP[26].xyzz, TEMP[2].xyzz 431: DP3 TEMP[28].x, TEMP[8].xyzz, TEMP[23].xyzz 432: MAX TEMP[29].x, IMM[4].wwww, TEMP[28].xxxx 433: POW TEMP[30].x, TEMP[29].xxxx, TEMP[3].wwww 434: MUL TEMP[31].xyz, TEMP[30].xxxx, TEMP[13].xyzz 435: MAD TEMP[27].xyz, TEMP[31].xyzz, TEMP[3].xyzz, TEMP[27].xyzz 436: MUL TEMP[32].x, TEMP[17].xxxx, IMM[5].zzzz 437: ADD TEMP[33].x, TEMP[21].xxxx, -TEMP[32].xxxx 438: ADD TEMP[34].x, TEMP[17].xxxx, -TEMP[32].xxxx 439: RCP TEMP[35].x, TEMP[34].xxxx 440: MUL_SAT TEMP[36].x, TEMP[33].xxxx, TEMP[35].xxxx 441: MUL TEMP[37].x, IMM[5].xxxx, TEMP[36].xxxx 442: ADD TEMP[38].x, IMM[5].wwww, -TEMP[37].xxxx 443: MUL TEMP[39].x, TEMP[36].xxxx, TEMP[38].xxxx 444: MUL TEMP[40].x, TEMP[36].xxxx, TEMP[39].xxxx 445: ADD TEMP[41].x, IMM[4].yyyy, -TEMP[40].xxxx 446: MUL TEMP[42].xyz, TEMP[27].xyzz, TEMP[41].xxxx 447: MOV TEMP[27].xyz, TEMP[42].xyzx 448: ADD TEMP[9].xyz, TEMP[9].xyzz, TEMP[42].xyzz 449: UADD TEMP[0].x, TEMP[0].xxxx, IMM[11].wwww 450: ENDLOOP :0 451: MOV TEMP[0].xyz, TEMP[9].xyzx 452: MOV OUT[0], TEMP[0] 453: END ; ModuleID = 'tgsi' define void @main([17 x <16 x i8>] addrspace(2)* byval, [32 x <16 x i8>] addrspace(2)* byval, [16 x <32 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* inreg, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1280) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1284) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1288) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1292) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1296) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1312) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1328) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1344) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1360) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1376) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1392) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1408) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1440) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1444) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1448) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1456) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1460) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1464) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1472) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1476) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 1480) %46 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [17 x <16 x i8>] addrspace(2)* %0, i64 0, i32 2 %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 0 %51 = load <32 x i8> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 0 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 1 %55 = load <32 x i8> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 1 %57 = load <16 x i8> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 2 %59 = load <32 x i8> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 2 %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 3 %63 = load <32 x i8> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 3 %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 4 %67 = load <32 x i8> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 4 %69 = load <16 x i8> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 5 %71 = load <32 x i8> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 5 %73 = load <16 x i8> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 6 %75 = load <32 x i8> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 6 %77 = load <16 x i8> addrspace(2)* %76, !tbaa !0 %78 = getelementptr [16 x <32 x i8>] addrspace(2)* %2, i64 0, i32 7 %79 = load <32 x i8> addrspace(2)* %78, !tbaa !0 %80 = getelementptr [32 x <16 x i8>] addrspace(2)* %1, i64 0, i32 7 %81 = load <16 x i8> addrspace(2)* %80, !tbaa !0 %82 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %94 = shl i32 14, 4 %95 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %94) %96 = shl i32 14, 4 %97 = add i32 %96, 4 %98 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %97) %99 = shl i32 14, 4 %100 = add i32 %99, 8 %101 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %100) %102 = shl i32 13, 4 %103 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %102) %104 = shl i32 13, 4 %105 = add i32 %104, 4 %106 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %105) %107 = shl i32 13, 4 %108 = add i32 %107, 8 %109 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %108) %110 = shl i32 12, 4 %111 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %110) %112 = shl i32 12, 4 %113 = add i32 %112, 4 %114 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %113) %115 = shl i32 12, 4 %116 = add i32 %115, 8 %117 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %116) %118 = fmul float %111, %85 %119 = fmul float %114, %85 %120 = fmul float %117, %85 %121 = fmul float %103, %86 %122 = fadd float %121, %118 %123 = fmul float %106, %86 %124 = fadd float %123, %119 %125 = fmul float %109, %86 %126 = fadd float %125, %120 %127 = fmul float %95, %87 %128 = fadd float %127, %122 %129 = fmul float %98, %87 %130 = fadd float %129, %124 %131 = fmul float %101, %87 %132 = fadd float %131, %126 %133 = shl i32 15, 4 %134 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %133) %135 = shl i32 15, 4 %136 = add i32 %135, 4 %137 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %136) %138 = shl i32 15, 4 %139 = add i32 %138, 8 %140 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %139) %141 = fadd float %128, %134 %142 = fadd float %130, %137 %143 = fadd float %132, %140 %144 = shl i32 20, 4 %145 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %144) %146 = shl i32 20, 4 %147 = add i32 %146, 4 %148 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %147) %149 = shl i32 20, 4 %150 = add i32 %149, 8 %151 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %150) %152 = fsub float -0.000000e+00, %145 %153 = fadd float %141, %152 %154 = fsub float -0.000000e+00, %148 %155 = fadd float %142, %154 %156 = fsub float -0.000000e+00, %151 %157 = fadd float %143, %156 %158 = fmul float %153, %153 %159 = fmul float %155, %155 %160 = fadd float %159, %158 %161 = fmul float %157, %157 %162 = fadd float %160, %161 %163 = call float @llvm.AMDGPU.rsq(float %162) %164 = fmul float %163, %162 %165 = fsub float -0.000000e+00, %162 %166 = call float @llvm.AMDGPU.cndlt(float %165, float %164, float 0.000000e+00) %167 = fdiv float 1.000000e+00, %33 %168 = fmul float %166, %167 %169 = fadd float %168, 1.000000e+00 %170 = call float @llvm.log2.f32(float %169) %171 = fmul float %170, 0x3FE62E4300000000 %172 = call float @llvm.log2.f32(float %34) %173 = fmul float %172, 0x3FE62E4300000000 %174 = fdiv float 1.000000e+00, %173 %175 = fmul float %171, %174 %176 = call float @floor(float %175) %177 = call float @llvm.AMDIL.exp.(float %176) %178 = call float @llvm.AMDIL.fraction.(float %175) %179 = call float @fabs(float %91) %180 = call float @fabs(float %92) %181 = call float @fabs(float %93) %182 = fadd float %179, 0x3F50624DE0000000 %183 = fadd float %180, 0x3F50624DE0000000 %184 = fadd float %181, 0x3F50624DE0000000 %185 = call float @llvm.pow.f32(float %182, float %32) %186 = call float @llvm.pow.f32(float %183, float %32) %187 = call float @llvm.pow.f32(float %184, float %32) %188 = fadd float %185, %186 %189 = fadd float %188, %187 %190 = fdiv float 1.000000e+00, %189 %191 = fmul float %185, %190 %192 = fmul float %186, %190 %193 = fmul float %187, %190 %194 = fmul float %31, %177 %195 = fdiv float 1.000000e+00, %194 %196 = fmul float %90, %195 %197 = fmul float %89, %195 %198 = bitcast float %196 to i32 %199 = bitcast float %197 to i32 %200 = insertelement <2 x i32> undef, i32 %198, i32 0 %201 = insertelement <2 x i32> %200, i32 %199, i32 1 %202 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %201, <32 x i8> %67, <16 x i8> %69, i32 2) %203 = extractelement <4 x float> %202, i32 0 %204 = extractelement <4 x float> %202, i32 1 %205 = extractelement <4 x float> %202, i32 2 %206 = fmul float %30, %177 %207 = fdiv float 1.000000e+00, %206 %208 = fmul float %88, %207 %209 = fmul float %90, %207 %210 = bitcast float %208 to i32 %211 = bitcast float %209 to i32 %212 = insertelement <2 x i32> undef, i32 %210, i32 0 %213 = insertelement <2 x i32> %212, i32 %211, i32 1 %214 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %213, <32 x i8> %63, <16 x i8> %65, i32 2) %215 = extractelement <4 x float> %214, i32 0 %216 = extractelement <4 x float> %214, i32 1 %217 = extractelement <4 x float> %214, i32 2 %218 = fmul float %29, %177 %219 = fdiv float 1.000000e+00, %218 %220 = fmul float %88, %219 %221 = fmul float %89, %219 %222 = bitcast float %220 to i32 %223 = bitcast float %221 to i32 %224 = insertelement <2 x i32> undef, i32 %222, i32 0 %225 = insertelement <2 x i32> %224, i32 %223, i32 1 %226 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %59, <16 x i8> %61, i32 2) %227 = extractelement <4 x float> %226, i32 0 %228 = extractelement <4 x float> %226, i32 1 %229 = extractelement <4 x float> %226, i32 2 %230 = fmul float %227, %193 %231 = fmul float %228, %193 %232 = fmul float %229, %193 %233 = fmul float %215, %192 %234 = fadd float %233, %230 %235 = fmul float %216, %192 %236 = fadd float %235, %231 %237 = fmul float %217, %192 %238 = fadd float %237, %232 %239 = fmul float %203, %191 %240 = fadd float %239, %234 %241 = fmul float %204, %191 %242 = fadd float %241, %236 %243 = fmul float %205, %191 %244 = fadd float %243, %238 %245 = fmul float %29, %177 %246 = fdiv float 1.000000e+00, %245 %247 = fmul float %88, %246 %248 = fmul float %89, %246 %249 = bitcast float %247 to i32 %250 = bitcast float %248 to i32 %251 = insertelement <2 x i32> undef, i32 %249, i32 0 %252 = insertelement <2 x i32> %251, i32 %250, i32 1 %253 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %252, <32 x i8> %71, <16 x i8> %73, i32 2) %254 = extractelement <4 x float> %253, i32 0 %255 = extractelement <4 x float> %253, i32 1 %256 = extractelement <4 x float> %253, i32 2 %257 = fmul float %254, 2.000000e+00 %258 = fadd float %257, 0xBFF0100000000000 %259 = fmul float %255, 2.000000e+00 %260 = fadd float %259, 0xBFF0100000000000 %261 = fmul float %30, %177 %262 = fdiv float 1.000000e+00, %261 %263 = fmul float %88, %262 %264 = fmul float %90, %262 %265 = bitcast float %263 to i32 %266 = bitcast float %264 to i32 %267 = insertelement <2 x i32> undef, i32 %265, i32 0 %268 = insertelement <2 x i32> %267, i32 %266, i32 1 %269 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %268, <32 x i8> %75, <16 x i8> %77, i32 2) %270 = extractelement <4 x float> %269, i32 0 %271 = extractelement <4 x float> %269, i32 1 %272 = extractelement <4 x float> %269, i32 2 %273 = fmul float %270, 2.000000e+00 %274 = fadd float %273, 0xBFF0100000000000 %275 = fmul float %271, 2.000000e+00 %276 = fadd float %275, 0xBFF0100000000000 %277 = fmul float %31, %177 %278 = fdiv float 1.000000e+00, %277 %279 = fmul float %90, %278 %280 = fmul float %89, %278 %281 = bitcast float %279 to i32 %282 = bitcast float %280 to i32 %283 = insertelement <2 x i32> undef, i32 %281, i32 0 %284 = insertelement <2 x i32> %283, i32 %282, i32 1 %285 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %284, <32 x i8> %79, <16 x i8> %81, i32 2) %286 = extractelement <4 x float> %285, i32 0 %287 = extractelement <4 x float> %285, i32 1 %288 = extractelement <4 x float> %285, i32 2 %289 = fmul float %286, 2.000000e+00 %290 = fadd float %289, 0xBFF0100000000000 %291 = fmul float %287, 2.000000e+00 %292 = fadd float %291, 0xBFF0100000000000 %293 = fadd float %179, %180 %294 = fadd float %293, %181 %295 = fdiv float 1.000000e+00, %294 %296 = fmul float %179, %295 %297 = fmul float %180, %295 %298 = fmul float %181, %295 %299 = fcmp ugt float %91, 0.000000e+00 %300 = select i1 %299, float 1.000000e+00, float %91 %301 = fcmp uge float %300, 0.000000e+00 %302 = select i1 %301, float %300, float -1.000000e+00 %303 = fmul float %288, %302 %304 = fmul float %292, %302 %305 = fmul float %290, %302 %306 = fcmp ugt float %92, 0.000000e+00 %307 = select i1 %306, float 1.000000e+00, float %92 %308 = fcmp uge float %307, 0.000000e+00 %309 = select i1 %308, float %307, float -1.000000e+00 %310 = fmul float %274, %309 %311 = fmul float %272, %309 %312 = fmul float %276, %309 %313 = fcmp ugt float %93, 0.000000e+00 %314 = select i1 %313, float 1.000000e+00, float %93 %315 = fcmp uge float %314, 0.000000e+00 %316 = select i1 %315, float %314, float -1.000000e+00 %317 = fmul float %258, %316 %318 = fmul float %260, %316 %319 = fmul float %256, %316 %320 = fmul float %317, %298 %321 = fmul float %318, %298 %322 = fmul float %319, %298 %323 = fmul float %310, %297 %324 = fadd float %323, %320 %325 = fmul float %311, %297 %326 = fadd float %325, %321 %327 = fmul float %312, %297 %328 = fadd float %327, %322 %329 = fmul float %303, %296 %330 = fadd float %329, %324 %331 = fmul float %304, %296 %332 = fadd float %331, %326 %333 = fmul float %305, %296 %334 = fadd float %333, %328 %335 = fcmp olt float %178, %35 %336 = sext i1 %335 to i32 %337 = bitcast i32 %336 to float %338 = bitcast float %337 to i32 %339 = icmp ne i32 %338, 0 br i1 %339, label %IF, label %ENDIF IF: ; preds = %main_body %340 = fmul float %177, 5.000000e-01 %341 = call float @fabs(float %91) %342 = call float @fabs(float %92) %343 = call float @fabs(float %93) %344 = fadd float %341, 0x3F50624DE0000000 %345 = fadd float %342, 0x3F50624DE0000000 %346 = fadd float %343, 0x3F50624DE0000000 %347 = call float @llvm.pow.f32(float %344, float %32) %348 = call float @llvm.pow.f32(float %345, float %32) %349 = call float @llvm.pow.f32(float %346, float %32) %350 = fadd float %347, %348 %351 = fadd float %350, %349 %352 = fdiv float 1.000000e+00, %351 %353 = fmul float %347, %352 %354 = fmul float %348, %352 %355 = fmul float %349, %352 %356 = fmul float %29, %340 %357 = fdiv float 1.000000e+00, %356 %358 = fmul float %88, %357 %359 = fmul float %89, %357 %360 = bitcast float %358 to i32 %361 = bitcast float %359 to i32 %362 = insertelement <2 x i32> undef, i32 %360, i32 0 %363 = insertelement <2 x i32> %362, i32 %361, i32 1 %364 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %363, <32 x i8> %71, <16 x i8> %73, i32 2) %365 = extractelement <4 x float> %364, i32 0 %366 = extractelement <4 x float> %364, i32 1 %367 = extractelement <4 x float> %364, i32 2 %368 = fmul float %365, 2.000000e+00 %369 = fadd float %368, 0xBFF0100000000000 %370 = fmul float %366, 2.000000e+00 %371 = fadd float %370, 0xBFF0100000000000 %372 = fmul float %30, %340 %373 = fdiv float 1.000000e+00, %372 %374 = fmul float %88, %373 %375 = fmul float %90, %373 %376 = bitcast float %374 to i32 %377 = bitcast float %375 to i32 %378 = insertelement <2 x i32> undef, i32 %376, i32 0 %379 = insertelement <2 x i32> %378, i32 %377, i32 1 %380 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %379, <32 x i8> %75, <16 x i8> %77, i32 2) %381 = extractelement <4 x float> %380, i32 0 %382 = extractelement <4 x float> %380, i32 1 %383 = extractelement <4 x float> %380, i32 2 %384 = fmul float %381, 2.000000e+00 %385 = fadd float %384, 0xBFF0100000000000 %386 = fmul float %382, 2.000000e+00 %387 = fadd float %386, 0xBFF0100000000000 %388 = fmul float %31, %340 %389 = fdiv float 1.000000e+00, %388 %390 = fmul float %90, %389 %391 = fmul float %89, %389 %392 = bitcast float %390 to i32 %393 = bitcast float %391 to i32 %394 = insertelement <2 x i32> undef, i32 %392, i32 0 %395 = insertelement <2 x i32> %394, i32 %393, i32 1 %396 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %395, <32 x i8> %79, <16 x i8> %81, i32 2) %397 = extractelement <4 x float> %396, i32 0 %398 = extractelement <4 x float> %396, i32 1 %399 = extractelement <4 x float> %396, i32 2 %400 = fmul float %397, 2.000000e+00 %401 = fadd float %400, 0xBFF0100000000000 %402 = fmul float %398, 2.000000e+00 %403 = fadd float %402, 0xBFF0100000000000 %404 = fadd float %341, %342 %405 = fadd float %404, %343 %406 = fdiv float 1.000000e+00, %405 %407 = fmul float %341, %406 %408 = fmul float %342, %406 %409 = fmul float %343, %406 %410 = fdiv float 1.000000e+00, %35 %411 = fmul float %178, %410 %412 = call float @llvm.AMDIL.clamp.(float %411, float 0.000000e+00, float 1.000000e+00) %413 = fmul float 2.000000e+00, %412 %414 = fsub float -0.000000e+00, %413 %415 = fadd float 3.000000e+00, %414 %416 = fmul float %412, %415 %417 = fmul float %412, %416 %418 = fmul float %31, %340 %419 = fdiv float 1.000000e+00, %418 %420 = fmul float %90, %419 %421 = fmul float %89, %419 %422 = bitcast float %420 to i32 %423 = bitcast float %421 to i32 %424 = insertelement <2 x i32> undef, i32 %422, i32 0 %425 = insertelement <2 x i32> %424, i32 %423, i32 1 %426 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %425, <32 x i8> %67, <16 x i8> %69, i32 2) %427 = extractelement <4 x float> %426, i32 0 %428 = extractelement <4 x float> %426, i32 1 %429 = extractelement <4 x float> %426, i32 2 %430 = fmul float %30, %340 %431 = fdiv float 1.000000e+00, %430 %432 = fmul float %88, %431 %433 = fmul float %90, %431 %434 = bitcast float %432 to i32 %435 = bitcast float %433 to i32 %436 = insertelement <2 x i32> undef, i32 %434, i32 0 %437 = insertelement <2 x i32> %436, i32 %435, i32 1 %438 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %437, <32 x i8> %63, <16 x i8> %65, i32 2) %439 = extractelement <4 x float> %438, i32 0 %440 = extractelement <4 x float> %438, i32 1 %441 = extractelement <4 x float> %438, i32 2 %442 = extractelement <4 x float> %438, i32 3 %443 = fmul float %29, %340 %444 = fdiv float 1.000000e+00, %443 %445 = fmul float %88, %444 %446 = fmul float %89, %444 %447 = bitcast float %445 to i32 %448 = bitcast float %446 to i32 %449 = insertelement <2 x i32> undef, i32 %447, i32 0 %450 = insertelement <2 x i32> %449, i32 %448, i32 1 %451 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %450, <32 x i8> %59, <16 x i8> %61, i32 2) %452 = extractelement <4 x float> %451, i32 0 %453 = extractelement <4 x float> %451, i32 1 %454 = extractelement <4 x float> %451, i32 2 %455 = fmul float %452, %355 %456 = fmul float %453, %355 %457 = fmul float %454, %355 %458 = fmul float %439, %354 %459 = fadd float %458, %455 %460 = fmul float %440, %354 %461 = fadd float %460, %456 %462 = fmul float %441, %354 %463 = fadd float %462, %457 %464 = fmul float %427, %353 %465 = fadd float %464, %459 %466 = fmul float %428, %353 %467 = fadd float %466, %461 %468 = fmul float %429, %353 %469 = fadd float %468, %463 %470 = call float @llvm.AMDGPU.lrp(float %417, float %240, float %465) %471 = call float @llvm.AMDGPU.lrp(float %417, float %242, float %467) %472 = call float @llvm.AMDGPU.lrp(float %417, float %244, float %469) %473 = fcmp ugt float %91, 0.000000e+00 %474 = select i1 %473, float 1.000000e+00, float %91 %475 = fcmp uge float %474, 0.000000e+00 %476 = select i1 %475, float %474, float -1.000000e+00 %477 = fmul float %399, %476 %478 = fmul float %403, %476 %479 = fmul float %401, %476 %480 = fcmp ugt float %92, 0.000000e+00 %481 = select i1 %480, float 1.000000e+00, float %92 %482 = fcmp uge float %481, 0.000000e+00 %483 = select i1 %482, float %481, float -1.000000e+00 %484 = fmul float %385, %483 %485 = fmul float %383, %483 %486 = fmul float %387, %483 %487 = fcmp ugt float %93, 0.000000e+00 %488 = select i1 %487, float 1.000000e+00, float %93 %489 = fcmp uge float %488, 0.000000e+00 %490 = select i1 %489, float %488, float -1.000000e+00 %491 = fmul float %369, %490 %492 = fmul float %371, %490 %493 = fmul float %367, %490 %494 = fmul float %491, %409 %495 = fmul float %492, %409 %496 = fmul float %493, %409 %497 = fmul float %484, %408 %498 = fadd float %497, %494 %499 = fmul float %485, %408 %500 = fadd float %499, %495 %501 = fmul float %486, %408 %502 = fadd float %501, %496 %503 = fmul float %477, %407 %504 = fadd float %503, %498 %505 = fmul float %478, %407 %506 = fadd float %505, %500 %507 = fmul float %479, %407 %508 = fadd float %507, %502 %509 = call float @llvm.AMDGPU.lrp(float %417, float %330, float %504) %510 = call float @llvm.AMDGPU.lrp(float %417, float %332, float %506) %511 = call float @llvm.AMDGPU.lrp(float %417, float %334, float %508) br label %ENDIF ENDIF: ; preds = %main_body, %IF %temp26.0 = phi float [ %511, %IF ], [ %334, %main_body ] %temp25.0 = phi float [ %510, %IF ], [ %332, %main_body ] %temp24.0 = phi float [ %509, %IF ], [ %330, %main_body ] %temp22.0 = phi float [ %472, %IF ], [ %244, %main_body ] %temp21.0 = phi float [ %471, %IF ], [ %242, %main_body ] %temp20.0 = phi float [ %470, %IF ], [ %240, %main_body ] %temp3.0 = phi float [ %442, %IF ], [ 0.000000e+00, %main_body ] %512 = fcmp olt float 5.000000e-01, %36 %513 = sext i1 %512 to i32 %514 = bitcast i32 %513 to float %515 = bitcast float %514 to i32 %516 = icmp ne i32 %515, 0 br i1 %516, label %IF191, label %ENDIF190 IF191: ; preds = %ENDIF %517 = fmul float %temp20.0, %82 %518 = fmul float %temp21.0, %83 %519 = fmul float %temp22.0, %84 br label %ENDIF190 ENDIF190: ; preds = %ENDIF, %IF191 %temp22.1 = phi float [ %519, %IF191 ], [ %temp22.0, %ENDIF ] %temp21.1 = phi float [ %518, %IF191 ], [ %temp21.0, %ENDIF ] %temp20.1 = phi float [ %517, %IF191 ], [ %temp20.0, %ENDIF ] %520 = fmul float %37, %temp24.0 %521 = fmul float %38, %temp24.0 %522 = fmul float %39, %temp24.0 %523 = fmul float %40, %temp25.0 %524 = fadd float %523, %520 %525 = fmul float %41, %temp25.0 %526 = fadd float %525, %521 %527 = fmul float %42, %temp25.0 %528 = fadd float %527, %522 %529 = fmul float %43, %temp26.0 %530 = fadd float %529, %524 %531 = fmul float %44, %temp26.0 %532 = fadd float %531, %526 %533 = fmul float %45, %temp26.0 %534 = fadd float %533, %528 %535 = fmul float %530, %530 %536 = fmul float %532, %532 %537 = fadd float %536, %535 %538 = fmul float %534, %534 %539 = fadd float %537, %538 %540 = call float @llvm.AMDGPU.rsq(float %539) %541 = fmul float %530, %540 %542 = fmul float %532, %540 %543 = fmul float %534, %540 %544 = shl i32 14, 4 %545 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %544) %546 = shl i32 14, 4 %547 = add i32 %546, 4 %548 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %547) %549 = shl i32 14, 4 %550 = add i32 %549, 8 %551 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %550) %552 = shl i32 13, 4 %553 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %552) %554 = shl i32 13, 4 %555 = add i32 %554, 4 %556 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %555) %557 = shl i32 13, 4 %558 = add i32 %557, 8 %559 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %558) %560 = shl i32 12, 4 %561 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %560) %562 = shl i32 12, 4 %563 = add i32 %562, 4 %564 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %563) %565 = shl i32 12, 4 %566 = add i32 %565, 8 %567 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %566) %568 = fmul float %561, %85 %569 = fmul float %564, %85 %570 = fmul float %567, %85 %571 = fmul float %553, %86 %572 = fadd float %571, %568 %573 = fmul float %556, %86 %574 = fadd float %573, %569 %575 = fmul float %559, %86 %576 = fadd float %575, %570 %577 = fmul float %545, %87 %578 = fadd float %577, %572 %579 = fmul float %548, %87 %580 = fadd float %579, %574 %581 = fmul float %551, %87 %582 = fadd float %581, %576 %583 = shl i32 15, 4 %584 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %583) %585 = shl i32 15, 4 %586 = add i32 %585, 4 %587 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %586) %588 = shl i32 15, 4 %589 = add i32 %588, 8 %590 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %589) %591 = fadd float %578, %584 %592 = fadd float %580, %587 %593 = fadd float %582, %590 %594 = shl i32 20, 4 %595 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %594) %596 = shl i32 20, 4 %597 = add i32 %596, 4 %598 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %597) %599 = shl i32 20, 4 %600 = add i32 %599, 8 %601 = call float @llvm.SI.load.const(<16 x i8> %47, i32 %600) %602 = fsub float -0.000000e+00, %595 %603 = fadd float %591, %602 %604 = fsub float -0.000000e+00, %598 %605 = fadd float %592, %604 %606 = fsub float -0.000000e+00, %601 %607 = fadd float %593, %606 %608 = fmul float %603, %603 %609 = fmul float %605, %605 %610 = fadd float %609, %608 %611 = fmul float %607, %607 %612 = fadd float %610, %611 %613 = call float @llvm.AMDGPU.rsq(float %612) %614 = fmul float %603, %613 %615 = fmul float %605, %613 %616 = fmul float %607, %613 %617 = fmul float %541, %614 %618 = fmul float %542, %615 %619 = fadd float %618, %617 %620 = fmul float %543, %616 %621 = fadd float %619, %620 %622 = fmul float %621, %541 %623 = fmul float %621, %542 %624 = fmul float %621, %543 %625 = fmul float 2.000000e+00, %622 %626 = fmul float 2.000000e+00, %623 %627 = fmul float 2.000000e+00, %624 %628 = fsub float -0.000000e+00, %625 %629 = fadd float %614, %628 %630 = fsub float -0.000000e+00, %626 %631 = fadd float %615, %630 %632 = fsub float -0.000000e+00, %627 %633 = fadd float %616, %632 %634 = fadd float %542, 0x3FF0CCCCC0000000 %635 = call float @llvm.AMDIL.clamp.(float %634, float 0.000000e+00, float 1.000000e+00) %636 = shl i32 14, 4 %637 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %636) %638 = shl i32 14, 4 %639 = add i32 %638, 4 %640 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %639) %641 = shl i32 14, 4 %642 = add i32 %641, 8 %643 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %642) %644 = fmul float %635, %637 %645 = fmul float %635, %640 %646 = fmul float %635, %643 %647 = fmul float %temp20.1, %644 %648 = fmul float %temp21.1, %645 %649 = fmul float %temp22.1, %646 %650 = shl i32 12, 4 %651 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %650) %652 = shl i32 12, 4 %653 = add i32 %652, 4 %654 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %653) %655 = shl i32 12, 4 %656 = add i32 %655, 8 %657 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %656) %658 = shl i32 13, 4 %659 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %658) %660 = shl i32 13, 4 %661 = add i32 %660, 4 %662 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %661) %663 = shl i32 13, 4 %664 = add i32 %663, 8 %665 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %664) %666 = fmul float %541, %651 %667 = fmul float %542, %654 %668 = fadd float %667, %666 %669 = fmul float %543, %657 %670 = fadd float %668, %669 %671 = fcmp uge float 0.000000e+00, %670 %672 = select i1 %671, float 0.000000e+00, float %670 %673 = fmul float %672, %659 %674 = fmul float %672, %662 %675 = fmul float %672, %665 %676 = fmul float %673, %temp20.1 %677 = fmul float %674, %temp21.1 %678 = fmul float %675, %temp22.1 %679 = fmul float %629, %651 %680 = fmul float %631, %654 %681 = fadd float %680, %679 %682 = fmul float %633, %657 %683 = fadd float %681, %682 %684 = fcmp uge float 0x3F50624DE0000000, %683 %685 = select i1 %684, float 0x3F50624DE0000000, float %683 %686 = call float @llvm.pow.f32(float %685, float %28) %687 = fmul float %686, %659 %688 = fmul float %686, %662 %689 = fmul float %686, %665 %690 = fmul float %687, %25 %691 = fadd float %690, %676 %692 = fmul float %688, %26 %693 = fadd float %692, %677 %694 = fmul float %689, %27 %695 = fadd float %694, %678 %696 = shl i32 15, 4 %697 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %696) %698 = shl i32 15, 4 %699 = add i32 %698, 4 %700 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %699) %701 = shl i32 15, 4 %702 = add i32 %701, 8 %703 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %702) %704 = fsub float -0.000000e+00, %697 %705 = fadd float %591, %704 %706 = fsub float -0.000000e+00, %700 %707 = fadd float %592, %706 %708 = fsub float -0.000000e+00, %703 %709 = fadd float %593, %708 %710 = fmul float %705, %705 %711 = fmul float %707, %707 %712 = fadd float %711, %710 %713 = fmul float %709, %709 %714 = fadd float %712, %713 %715 = call float @llvm.AMDGPU.rsq(float %714) %716 = fmul float %715, %714 %717 = fsub float -0.000000e+00, %714 %718 = call float @llvm.AMDGPU.cndlt(float %717, float %716, float 0.000000e+00) %719 = shl i32 16, 4 %720 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %719) %721 = fmul float 0x3FE99999A0000000, %720 %722 = fcmp olt float %718, %721 %723 = sext i1 %722 to i32 %724 = bitcast i32 %723 to float %725 = bitcast float %724 to i32 %726 = icmp ne i32 %725, 0 br i1 %726, label %IF226, label %ELSE227 IF226: ; preds = %ENDIF190 %727 = shl i32 6, 4 %728 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %727) %729 = shl i32 6, 4 %730 = add i32 %729, 4 %731 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %730) %732 = shl i32 6, 4 %733 = add i32 %732, 8 %734 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %733) %735 = shl i32 5, 4 %736 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %735) %737 = shl i32 5, 4 %738 = add i32 %737, 4 %739 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %738) %740 = shl i32 5, 4 %741 = add i32 %740, 8 %742 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %741) %743 = shl i32 4, 4 %744 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %743) %745 = shl i32 4, 4 %746 = add i32 %745, 4 %747 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %746) %748 = shl i32 4, 4 %749 = add i32 %748, 8 %750 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %749) %751 = fmul float %744, %591 %752 = fmul float %747, %591 %753 = fmul float %750, %591 %754 = fmul float %736, %592 %755 = fadd float %754, %751 %756 = fmul float %739, %592 %757 = fadd float %756, %752 %758 = fmul float %742, %592 %759 = fadd float %758, %753 %760 = fmul float %728, %593 %761 = fadd float %760, %755 %762 = fmul float %731, %593 %763 = fadd float %762, %757 %764 = fmul float %734, %593 %765 = fadd float %764, %759 %766 = shl i32 7, 4 %767 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %766) %768 = shl i32 7, 4 %769 = add i32 %768, 4 %770 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %769) %771 = shl i32 7, 4 %772 = add i32 %771, 8 %773 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %772) %774 = fadd float %761, %767 %775 = fadd float %763, %770 %776 = fadd float %765, %773 %777 = shl i32 15, 4 %778 = add i32 %777, 12 %779 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %778) %780 = fcmp uge float %776, -1.000000e+00 %781 = select i1 %780, float %776, float -1.000000e+00 %782 = fcmp uge float %781, 1.000000e+00 %783 = select i1 %782, float 1.000000e+00, float %781 %784 = fadd float %783, 1.000000e+00 %785 = fsub float -0.000000e+00, %779 %786 = fmul float %785, %784 %787 = fmul float 0x3FE7154760000000, %786 %788 = call float @llvm.AMDIL.exp.(float %787) %789 = fmul float %774, 5.000000e-01 %790 = fadd float %789, 5.000000e-01 %791 = fmul float %775, 5.000000e-01 %792 = fadd float %791, 5.000000e-01 %793 = shl i32 16, 4 %794 = add i32 %793, 8 %795 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %794) %796 = fmul float %790, %795 %797 = fmul float %792, %795 %798 = bitcast float %796 to i32 %799 = bitcast float %797 to i32 %800 = insertelement <2 x i32> undef, i32 %798, i32 0 %801 = insertelement <2 x i32> %800, i32 %799, i32 1 %802 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %801, <32 x i8> %51, <16 x i8> %53, i32 5) %803 = extractelement <4 x float> %802, i32 0 %804 = fmul float %788, %803 %805 = call float @llvm.AMDIL.clamp.(float %804, float 0.000000e+00, float 1.000000e+00) br label %ENDIF225 ELSE227: ; preds = %ENDIF190 %806 = shl i32 16, 4 %807 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %806) %808 = fcmp olt float %807, %718 %809 = sext i1 %808 to i32 %810 = bitcast i32 %809 to float %811 = bitcast float %810 to i32 %812 = icmp ne i32 %811, 0 br i1 %812, label %IF248, label %ELSE249 ENDIF225: ; preds = %IF248, %ELSE249, %IF226 %temp28.0 = phi float [ %805, %IF226 ], [ %946, %IF248 ], [ %1114, %ELSE249 ] %813 = fadd float %temp28.0, 0xBFE99999A0000000 %814 = fmul float %813, 0x4014000020000000 %815 = call float @llvm.AMDIL.clamp.(float %814, float 0.000000e+00, float 1.000000e+00) %816 = fmul float 2.000000e+00, %815 %817 = fsub float -0.000000e+00, %816 %818 = fadd float 3.000000e+00, %817 %819 = fmul float %815, %818 %820 = fmul float %815, %819 %821 = fmul float %691, %820 %822 = fmul float %693, %820 %823 = fmul float %695, %820 %824 = fadd float %647, %821 %825 = fadd float %648, %822 %826 = fadd float %649, %823 %827 = bitcast float %24 to i32 %828 = fsub float -0.000000e+00, %591 %829 = fsub float -0.000000e+00, %592 %830 = fsub float -0.000000e+00, %593 br label %LOOP IF248: ; preds = %ELSE227 %831 = shl i32 10, 4 %832 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %831) %833 = shl i32 10, 4 %834 = add i32 %833, 4 %835 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %834) %836 = shl i32 10, 4 %837 = add i32 %836, 8 %838 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %837) %839 = shl i32 9, 4 %840 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %839) %841 = shl i32 9, 4 %842 = add i32 %841, 4 %843 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %842) %844 = shl i32 9, 4 %845 = add i32 %844, 8 %846 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %845) %847 = shl i32 8, 4 %848 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %847) %849 = shl i32 8, 4 %850 = add i32 %849, 4 %851 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %850) %852 = shl i32 8, 4 %853 = add i32 %852, 8 %854 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %853) %855 = fmul float %848, %591 %856 = fmul float %851, %591 %857 = fmul float %854, %591 %858 = fmul float %840, %592 %859 = fadd float %858, %855 %860 = fmul float %843, %592 %861 = fadd float %860, %856 %862 = fmul float %846, %592 %863 = fadd float %862, %857 %864 = fmul float %832, %593 %865 = fadd float %864, %859 %866 = fmul float %835, %593 %867 = fadd float %866, %861 %868 = fmul float %838, %593 %869 = fadd float %868, %863 %870 = shl i32 11, 4 %871 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %870) %872 = shl i32 11, 4 %873 = add i32 %872, 4 %874 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %873) %875 = shl i32 11, 4 %876 = add i32 %875, 8 %877 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %876) %878 = fadd float %865, %871 %879 = fadd float %867, %874 %880 = fadd float %869, %877 %881 = shl i32 15, 4 %882 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %881) %883 = shl i32 15, 4 %884 = add i32 %883, 4 %885 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %884) %886 = shl i32 15, 4 %887 = add i32 %886, 8 %888 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %887) %889 = fsub float -0.000000e+00, %882 %890 = fadd float %591, %889 %891 = fsub float -0.000000e+00, %885 %892 = fadd float %592, %891 %893 = fsub float -0.000000e+00, %888 %894 = fadd float %593, %893 %895 = fmul float %890, %890 %896 = fmul float %892, %892 %897 = fadd float %896, %895 %898 = fmul float %894, %894 %899 = fadd float %897, %898 %900 = call float @llvm.AMDGPU.rsq(float %899) %901 = fmul float %900, %899 %902 = fsub float -0.000000e+00, %899 %903 = call float @llvm.AMDGPU.cndlt(float %902, float %901, float 0.000000e+00) %904 = shl i32 16, 4 %905 = add i32 %904, 4 %906 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %905) %907 = fdiv float 1.000000e+00, %906 %908 = fmul float %903, %907 %909 = fadd float %908, 0xBFE99999A0000000 %910 = fmul float %909, 0x4014000020000000 %911 = call float @llvm.AMDIL.clamp.(float %910, float 0.000000e+00, float 1.000000e+00) %912 = shl i32 15, 4 %913 = add i32 %912, 12 %914 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %913) %915 = fcmp uge float %880, -1.000000e+00 %916 = select i1 %915, float %880, float -1.000000e+00 %917 = fcmp uge float %916, 1.000000e+00 %918 = select i1 %917, float 1.000000e+00, float %916 %919 = fadd float %918, 1.000000e+00 %920 = fsub float -0.000000e+00, %914 %921 = fmul float %920, %919 %922 = fmul float 0x3FE7154760000000, %921 %923 = call float @llvm.AMDIL.exp.(float %922) %924 = fmul float %878, 5.000000e-01 %925 = fadd float %924, 5.000000e-01 %926 = fmul float %879, 5.000000e-01 %927 = fadd float %926, 5.000000e-01 %928 = shl i32 16, 4 %929 = add i32 %928, 8 %930 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %929) %931 = fmul float %925, %930 %932 = fmul float %927, %930 %933 = bitcast float %931 to i32 %934 = bitcast float %932 to i32 %935 = insertelement <2 x i32> undef, i32 %933, i32 0 %936 = insertelement <2 x i32> %935, i32 %934, i32 1 %937 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %936, <32 x i8> %55, <16 x i8> %57, i32 5) %938 = extractelement <4 x float> %937, i32 0 %939 = fmul float %923, %938 %940 = call float @llvm.AMDIL.clamp.(float %939, float 0.000000e+00, float 1.000000e+00) %941 = fmul float 2.000000e+00, %911 %942 = fsub float -0.000000e+00, %941 %943 = fadd float 3.000000e+00, %942 %944 = fmul float %911, %943 %945 = fmul float %911, %944 %946 = call float @llvm.AMDGPU.lrp(float %945, float 1.000000e+00, float %940) br label %ENDIF225 ELSE249: ; preds = %ELSE227 %947 = shl i32 6, 4 %948 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %947) %949 = shl i32 6, 4 %950 = add i32 %949, 4 %951 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %950) %952 = shl i32 6, 4 %953 = add i32 %952, 8 %954 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %953) %955 = shl i32 5, 4 %956 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %955) %957 = shl i32 5, 4 %958 = add i32 %957, 4 %959 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %958) %960 = shl i32 5, 4 %961 = add i32 %960, 8 %962 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %961) %963 = shl i32 4, 4 %964 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %963) %965 = shl i32 4, 4 %966 = add i32 %965, 4 %967 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %966) %968 = shl i32 4, 4 %969 = add i32 %968, 8 %970 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %969) %971 = fmul float %964, %591 %972 = fmul float %967, %591 %973 = fmul float %970, %591 %974 = fmul float %956, %592 %975 = fadd float %974, %971 %976 = fmul float %959, %592 %977 = fadd float %976, %972 %978 = fmul float %962, %592 %979 = fadd float %978, %973 %980 = fmul float %948, %593 %981 = fadd float %980, %975 %982 = fmul float %951, %593 %983 = fadd float %982, %977 %984 = fmul float %954, %593 %985 = fadd float %984, %979 %986 = shl i32 7, 4 %987 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %986) %988 = shl i32 7, 4 %989 = add i32 %988, 4 %990 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %989) %991 = shl i32 7, 4 %992 = add i32 %991, 8 %993 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %992) %994 = fadd float %981, %987 %995 = fadd float %983, %990 %996 = fadd float %985, %993 %997 = shl i32 10, 4 %998 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %997) %999 = shl i32 10, 4 %1000 = add i32 %999, 4 %1001 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %1000) %1002 = shl i32 10, 4 %1003 = add i32 %1002, 8 %1004 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %1003) %1005 = shl i32 9, 4 %1006 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %1005) %1007 = shl i32 9, 4 %1008 = add i32 %1007, 4 %1009 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %1008) %1010 = shl i32 9, 4 %1011 = add i32 %1010, 8 %1012 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %1011) %1013 = shl i32 8, 4 %1014 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %1013) %1015 = shl i32 8, 4 %1016 = add i32 %1015, 4 %1017 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %1016) %1018 = shl i32 8, 4 %1019 = add i32 %1018, 8 %1020 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %1019) %1021 = fmul float %1014, %591 %1022 = fmul float %1017, %591 %1023 = fmul float %1020, %591 %1024 = fmul float %1006, %592 %1025 = fadd float %1024, %1021 %1026 = fmul float %1009, %592 %1027 = fadd float %1026, %1022 %1028 = fmul float %1012, %592 %1029 = fadd float %1028, %1023 %1030 = fmul float %998, %593 %1031 = fadd float %1030, %1025 %1032 = fmul float %1001, %593 %1033 = fadd float %1032, %1027 %1034 = fmul float %1004, %593 %1035 = fadd float %1034, %1029 %1036 = shl i32 11, 4 %1037 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %1036) %1038 = shl i32 11, 4 %1039 = add i32 %1038, 4 %1040 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %1039) %1041 = shl i32 11, 4 %1042 = add i32 %1041, 8 %1043 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %1042) %1044 = fadd float %1031, %1037 %1045 = fadd float %1033, %1040 %1046 = fadd float %1035, %1043 %1047 = shl i32 16, 4 %1048 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %1047) %1049 = fmul float 0x3FE99999A0000000, %1048 %1050 = fsub float -0.000000e+00, %1049 %1051 = fadd float %718, %1050 %1052 = fsub float -0.000000e+00, %1049 %1053 = fadd float %1048, %1052 %1054 = fdiv float 1.000000e+00, %1053 %1055 = fmul float %1051, %1054 %1056 = call float @llvm.AMDIL.clamp.(float %1055, float 0.000000e+00, float 1.000000e+00) %1057 = shl i32 15, 4 %1058 = add i32 %1057, 12 %1059 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %1058) %1060 = shl i32 16, 4 %1061 = add i32 %1060, 8 %1062 = call float @llvm.SI.load.const(<16 x i8> %49, i32 %1061) %1063 = fcmp uge float %996, -1.000000e+00 %1064 = select i1 %1063, float %996, float -1.000000e+00 %1065 = fcmp uge float %1064, 1.000000e+00 %1066 = select i1 %1065, float 1.000000e+00, float %1064 %1067 = fadd float %1066, 1.000000e+00 %1068 = fsub float -0.000000e+00, %1059 %1069 = fmul float %1068, %1067 %1070 = fmul float 0x3FE7154760000000, %1069 %1071 = call float @llvm.AMDIL.exp.(float %1070) %1072 = fmul float %994, 5.000000e-01 %1073 = fadd float %1072, 5.000000e-01 %1074 = fmul float %995, 5.000000e-01 %1075 = fadd float %1074, 5.000000e-01 %1076 = fmul float %1073, %1062 %1077 = fmul float %1075, %1062 %1078 = bitcast float %1076 to i32 %1079 = bitcast float %1077 to i32 %1080 = insertelement <2 x i32> undef, i32 %1078, i32 0 %1081 = insertelement <2 x i32> %1080, i32 %1079, i32 1 %1082 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1081, <32 x i8> %51, <16 x i8> %53, i32 5) %1083 = extractelement <4 x float> %1082, i32 0 %1084 = fmul float %1071, %1083 %1085 = call float @llvm.AMDIL.clamp.(float %1084, float 0.000000e+00, float 1.000000e+00) %1086 = fcmp uge float %1046, -1.000000e+00 %1087 = select i1 %1086, float %1046, float -1.000000e+00 %1088 = fcmp uge float %1087, 1.000000e+00 %1089 = select i1 %1088, float 1.000000e+00, float %1087 %1090 = fadd float %1089, 1.000000e+00 %1091 = fsub float -0.000000e+00, %1059 %1092 = fmul float %1091, %1090 %1093 = fmul float 0x3FE7154760000000, %1092 %1094 = call float @llvm.AMDIL.exp.(float %1093) %1095 = fmul float %1044, 5.000000e-01 %1096 = fadd float %1095, 5.000000e-01 %1097 = fmul float %1045, 5.000000e-01 %1098 = fadd float %1097, 5.000000e-01 %1099 = fmul float %1096, %1062 %1100 = fmul float %1098, %1062 %1101 = bitcast float %1099 to i32 %1102 = bitcast float %1100 to i32 %1103 = insertelement <2 x i32> undef, i32 %1101, i32 0 %1104 = insertelement <2 x i32> %1103, i32 %1102, i32 1 %1105 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1104, <32 x i8> %55, <16 x i8> %57, i32 5) %1106 = extractelement <4 x float> %1105, i32 0 %1107 = fmul float %1094, %1106 %1108 = call float @llvm.AMDIL.clamp.(float %1107, float 0.000000e+00, float 1.000000e+00) %1109 = fmul float 2.000000e+00, %1056 %1110 = fsub float -0.000000e+00, %1109 %1111 = fadd float 3.000000e+00, %1110 %1112 = fmul float %1056, %1111 %1113 = fmul float %1056, %1112 %1114 = call float @llvm.AMDGPU.lrp(float %1113, float %1108, float %1085) br label %ENDIF225 LOOP: ; preds = %ENDIF307, %ENDIF225 %temp38.0 = phi float [ %826, %ENDIF225 ], [ %1240, %ENDIF307 ] %temp37.0 = phi float [ %825, %ENDIF225 ], [ %1239, %ENDIF307 ] %temp36.0 = phi float [ %824, %ENDIF225 ], [ %1238, %ENDIF307 ] %temp.0 = phi float [ 0.000000e+00, %ENDIF225 ], [ %1243, %ENDIF307 ] %1115 = bitcast float %temp.0 to i32 %1116 = icmp sge i32 %1115, %827 %1117 = sext i1 %1116 to i32 %1118 = bitcast i32 %1117 to float %1119 = bitcast float %1118 to i32 %1120 = icmp ne i32 %1119, 0 br i1 %1120, label %IF308, label %ENDIF307 IF308: ; preds = %LOOP %temp36.0.lcssa = phi float [ %temp36.0, %LOOP ] %temp37.0.lcssa = phi float [ %temp37.0, %LOOP ] %temp38.0.lcssa = phi float [ %temp38.0, %LOOP ] %1121 = call i32 @llvm.SI.packf16(float %temp36.0.lcssa, float %temp37.0.lcssa) %1122 = bitcast i32 %1121 to float %1123 = call i32 @llvm.SI.packf16(float %temp38.0.lcssa, float %temp3.0) %1124 = bitcast i32 %1123 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1122, float %1124, float %1122, float %1124) ret void ENDIF307: ; preds = %LOOP %1125 = bitcast float %temp.0 to i32 %1126 = mul i32 %1125, 7 %1127 = bitcast i32 %1126 to float %1128 = bitcast float %1127 to i32 %1129 = shl i32 %1128, 4 %1130 = add i32 %1129, 64 %1131 = call float @llvm.SI.load.const(<16 x i8> %23, i32 %1130) %1132 = bitcast float %1127 to i32 %1133 = add i32 %1132, 1 %1134 = bitcast i32 %1133 to float %1135 = bitcast float %1134 to i32 %1136 = shl i32 %1135, 4 %1137 = add i32 %1136, 64 %1138 = call float @llvm.SI.load.const(<16 x i8> %23, i32 %1137) %1139 = bitcast float %1127 to i32 %1140 = add i32 %1139, 2 %1141 = bitcast i32 %1140 to float %1142 = bitcast float %1141 to i32 %1143 = shl i32 %1142, 4 %1144 = add i32 %1143, 64 %1145 = call float @llvm.SI.load.const(<16 x i8> %23, i32 %1144) %1146 = bitcast float %1127 to i32 %1147 = add i32 %1146, 3 %1148 = bitcast i32 %1147 to float %1149 = bitcast float %1148 to i32 %1150 = shl i32 %1149, 4 %1151 = add i32 %1150, 64 %1152 = call float @llvm.SI.load.const(<16 x i8> %23, i32 %1151) %1153 = bitcast float %1127 to i32 %1154 = add i32 %1153, 4 %1155 = bitcast i32 %1154 to float %1156 = bitcast float %1155 to i32 %1157 = shl i32 %1156, 4 %1158 = add i32 %1157, 64 %1159 = call float @llvm.SI.load.const(<16 x i8> %23, i32 %1158) %1160 = bitcast float %1127 to i32 %1161 = add i32 %1160, 5 %1162 = bitcast i32 %1161 to float %1163 = bitcast float %1162 to i32 %1164 = shl i32 %1163, 4 %1165 = add i32 %1164, 64 %1166 = call float @llvm.SI.load.const(<16 x i8> %23, i32 %1165) %1167 = bitcast float %1127 to i32 %1168 = add i32 %1167, 6 %1169 = bitcast i32 %1168 to float %1170 = bitcast float %1169 to i32 %1171 = shl i32 %1170, 4 %1172 = add i32 %1171, 64 %1173 = call float @llvm.SI.load.const(<16 x i8> %23, i32 %1172) %1174 = fadd float %1131, %828 %1175 = fadd float %1138, %829 %1176 = fadd float %1145, %830 %1177 = fmul float %1174, %1174 %1178 = fmul float %1175, %1175 %1179 = fadd float %1178, %1177 %1180 = fmul float %1176, %1176 %1181 = fadd float %1179, %1180 %1182 = call float @llvm.AMDGPU.rsq(float %1181) %1183 = fmul float %1182, %1181 %1184 = fsub float -0.000000e+00, %1181 %1185 = call float @llvm.AMDGPU.cndlt(float %1184, float %1183, float 0.000000e+00) %1186 = fdiv float 1.000000e+00, %1185 %1187 = fmul float %1174, %1186 %1188 = fmul float %1175, %1186 %1189 = fmul float %1176, %1186 %1190 = fmul float %541, %1187 %1191 = fmul float %542, %1188 %1192 = fadd float %1191, %1190 %1193 = fmul float %543, %1189 %1194 = fadd float %1192, %1193 %1195 = fcmp uge float 0.000000e+00, %1194 %1196 = select i1 %1195, float 0.000000e+00, float %1194 %1197 = fmul float %1196, %1152 %1198 = fmul float %1196, %1159 %1199 = fmul float %1196, %1166 %1200 = fmul float %1197, %temp20.1 %1201 = fmul float %1198, %temp21.1 %1202 = fmul float %1199, %temp22.1 %1203 = fmul float %629, %1187 %1204 = fmul float %631, %1188 %1205 = fadd float %1204, %1203 %1206 = fmul float %633, %1189 %1207 = fadd float %1205, %1206 %1208 = fcmp uge float 0x3F50624DE0000000, %1207 %1209 = select i1 %1208, float 0x3F50624DE0000000, float %1207 %1210 = call float @llvm.pow.f32(float %1209, float %28) %1211 = fmul float %1210, %1152 %1212 = fmul float %1210, %1159 %1213 = fmul float %1210, %1166 %1214 = fmul float %1211, %25 %1215 = fadd float %1214, %1200 %1216 = fmul float %1212, %26 %1217 = fadd float %1216, %1201 %1218 = fmul float %1213, %27 %1219 = fadd float %1218, %1202 %1220 = fmul float %1173, 5.000000e-01 %1221 = fsub float -0.000000e+00, %1220 %1222 = fadd float %1185, %1221 %1223 = fsub float -0.000000e+00, %1220 %1224 = fadd float %1173, %1223 %1225 = fdiv float 1.000000e+00, %1224 %1226 = fmul float %1222, %1225 %1227 = call float @llvm.AMDIL.clamp.(float %1226, float 0.000000e+00, float 1.000000e+00) %1228 = fmul float 2.000000e+00, %1227 %1229 = fsub float -0.000000e+00, %1228 %1230 = fadd float 3.000000e+00, %1229 %1231 = fmul float %1227, %1230 %1232 = fmul float %1227, %1231 %1233 = fsub float -0.000000e+00, %1232 %1234 = fadd float 1.000000e+00, %1233 %1235 = fmul float %1215, %1234 %1236 = fmul float %1217, %1234 %1237 = fmul float %1219, %1234 %1238 = fadd float %temp36.0, %1235 %1239 = fadd float %temp37.0, %1236 %1240 = fadd float %temp38.0, %1237 %1241 = bitcast float %temp.0 to i32 %1242 = add i32 %1241, 1 %1243 = bitcast i32 %1242 to float br label %LOOP } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.rsq(float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: readonly declare float @floor(float) #4 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: readonly declare float @fabs(float) #4 ; Function Attrs: nounwind readonly declare float @llvm.pow.f32(float, float) #3 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } attributes #3 = { nounwind readonly } attributes #4 = { readonly } !0 = metadata !{metadata !"const", null, i32 1} Stacktrace: Native stacktrace: /usr/lib/libmono-2.0.so.1(+0xd4335) [0x7fb5c7772335] /usr/lib/libmono-2.0.so.1(+0x12f27b) [0x7fb5c77cd27b] /usr/lib/libmono-2.0.so.1(+0x3f4b2) [0x7fb5c76dd4b2] /usr/lib/libpthread.so.0(+0xf880) [0x7fb5ca462880] /usr/lib/libLLVM-3.5svn.so(_ZN4llvm9LiveRange4findENS_9SlotIndexE+0x19) [0x7fb5b94e4291] /usr/lib/libLLVM-3.5svn.so(_ZN4llvm13LiveIntervals12shrinkToUsesEPNS_12LiveIntervalEPNS_15SmallVectorImplIPNS_12MachineInstrEEE+0x223) [0x7fb5b94e9f23] /usr/lib/libLLVM-3.5svn.so(_ZN4llvm13LiveRangeEdit17eliminateDeadDefsERNS_15SmallVectorImplIPNS_12MachineInstrEEENS_8ArrayRefIjEE+0x156) [0x7fb5b94f67fe] /usr/lib/libLLVM-3.5svn.so(+0xbcaadf) [0x7fb5b94cdadf] /usr/lib/libLLVM-3.5svn.so(+0xbcad37) [0x7fb5b94cdd37] /usr/lib/libLLVM-3.5svn.so(+0xcaca7b) [0x7fb5b95afa7b] /usr/lib/libLLVM-3.5svn.so(+0xcacc17) [0x7fb5b95afc17] /usr/lib/libLLVM-3.5svn.so(_ZN4llvm12RegAllocBase16allocatePhysRegsEv+0x188) [0x7fb5b95a1542] /usr/lib/libLLVM-3.5svn.so(+0xcaf0d7) [0x7fb5b95b20d7] /usr/lib/libLLVM-3.5svn.so(_ZN4llvm19MachineFunctionPass13runOnFunctionERNS_8FunctionE+0x2a) [0x7fb5b953863e] /usr/lib/libLLVM-3.5svn.so(_ZN4llvm13FPPassManager13runOnFunctionERNS_8FunctionE+0x128) [0x7fb5b8f19b98] /usr/lib/libLLVM-3.5svn.so(_ZN4llvm13FPPassManager11runOnModuleERNS_6ModuleE+0x27) [0x7fb5b8f19eef] /usr/lib/libLLVM-3.5svn.so(+0x617036) [0x7fb5b8f1a036] /usr/lib/libLLVM-3.5svn.so(_ZN4llvm6legacy15PassManagerImpl3runERNS_6ModuleE+0x8b) [0x7fb5b8f1a1ef] /usr/lib/libLLVM-3.5svn.so(_ZN4llvm6legacy11PassManager3runERNS_6ModuleE+0xd) [0x7fb5b8f1a253] /usr/lib/libLLVM-3.5svn.so(+0xe29909) [0x7fb5b972c909] /usr/lib/libLLVM-3.5svn.so(LLVMTargetMachineEmitToMemoryBuffer+0x194) [0x7fb5b972ce24] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x325ea2) [0x7fb5bb020ea2] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x312c47) [0x7fb5bb00dc47] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x313681) [0x7fb5bb00e681] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x318652) [0x7fb5bb013652] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x318742) [0x7fb5bb013742] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x318788) [0x7fb5bb013788] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x171e65) [0x7fb5bae6ce65] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x172cbf) [0x7fb5bae6dcbf] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x142b6b) [0x7fb5bae3db6b] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x13fdeb) [0x7fb5bae3adeb] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x146da9) [0x7fb5bae41da9] /usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x615ae) [0x7fb5bad5c5ae] /home/chris/build/UpvoidLauncher/bin/Engine/Libs/libUpvoidEngineLib.so(_ZN9rendering16TransparentStage6renderEv+0x20c) [0x7fb5caaf2eac] /home/chris/build/UpvoidLauncher/bin/Engine/Libs/libUpvoidEngineLib.so(_ZN9rendering17RenderingPipeline6renderEv+0x751) [0x7fb5caaa66e1] /home/chris/build/UpvoidLauncher/bin/Engine/Libs/libUpvoidEngineLib.so(_ZN9rendering12RenderEngine6renderEd+0x105c) [0x7fb5cab0b9fc] /home/chris/build/UpvoidLauncher/bin/Engine/Libs/libUpvoidEngineLib.so(_ZN9threading12WorkerThread4Mode7triggerEPS0_d+0x35) [0x7fb5cabbf535] /home/chris/build/UpvoidLauncher/bin/Engine/Libs/libUpvoidEngineLib.so(_ZN9threading12WorkerThread6updateEv+0x160) [0x7fb5cabc0280] /home/chris/build/UpvoidLauncher/bin/Engine/Libs/libUpvoidEngineLib.so(_ZN9threading12ThreadEngine13callUiThreadsEv+0x2aa) [0x7fb5cabc224a] /home/chris/build/UpvoidLauncher/bin/Engine/Libs/libUpvoidEngineLib.so(_ZN6Engine13callUiThreadsEv+0xeb) [0x7fb5cad28b0b] ./UpvoidEngine(main+0xb5) [0x4022e5] /usr/lib/libc.so.6(__libc_start_main+0xf5) [0x7fb5c94e5b05] ./UpvoidEngine() [0x401db9] Debug info from gdb: ptrace: Die Operation ist nicht erlaubt. No threads. ================================================================= Got a SIGSEGV while executing native code. This usually indicates a fatal error in the mono runtime or one of the native libraries used by your application. =================================================================